From 42f5b1ae572019053549327d5232421fc3cb80ec Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 3 Oct 2024 15:31:14 +0200 Subject: [PATCH 01/11] [post-release] in CODEGEN __init__.py, update cudacpp version from 1.00.00 to 1.00.01 --- epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py index 224b3a25e0..7116bc7031 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py @@ -66,7 +66,7 @@ __author__ = 'Andrea Valassi' __email__ = 'andrea.valassi@cern.ch' - __version__ = (1,00,00) # NB the release infrastructure expects 1-digit major and 2-digit minor and patch versions (n,nn,nn) + __version__ = (1,00,01) # NB the release infrastructure expects 1-digit major and 2-digit minor and patch versions (n,nn,nn) minimal_mg5amcnlo_version = (3,6,0) maximal_mg5amcnlo_version = (1000,1000,1000) From 18ed066f7d1b097d6e76a6fafe01cb199b4cc859 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 3 Oct 2024 15:26:36 +0200 Subject: [PATCH 02/11] [post-release] in CHANGELOG.md, add an Unreleased section for 1.00.01 --- .../CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md index 4fec2a607d..7ebe38f445 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md @@ -6,6 +6,14 @@ The format is loosely based on [Keep a Changelog](https://keepachangelog.com). -------------------------------------------------------------------------------- +## [Unreleased] - 2024-10-03 + +### Changed + +- Updated cudacpp version to 1.00.01. + +-------------------------------------------------------------------------------- + ## [1.00.00] - 2024-10-03 ### Added @@ -35,6 +43,7 @@ The format is loosely based on [Keep a Changelog](https://keepachangelog.com). -------------------------------------------------------------------------------- [1.00.00]: https://github.com/madgraph5/madgraph4gpu/releases/tag/cudacpp_for3.6.0_v1.00.00 +[Unreleased]: https://github.com/madgraph5/madgraph4gpu/releases/compare/cudacpp_for3.6.0_v1.00.00...HEAD [#601]: https://github.com/madgraph5/madgraph4gpu/issues/601 [#846]: https://github.com/madgraph5/madgraph4gpu/issues/846 From e80938b37e951ab745d3b9eaafb1d41c756587b0 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 3 Oct 2024 18:07:46 +0200 Subject: [PATCH 03/11] [post-release] in CODEGEN __init__.py, replace (1,00,01) by (1,0,1) as leading zeros in decimal integer literals are not permitted (#1013) --- epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py index 7116bc7031..3123240fbd 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py @@ -66,7 +66,11 @@ __author__ = 'Andrea Valassi' __email__ = 'andrea.valassi@cern.ch' - __version__ = (1,00,01) # NB the release infrastructure expects 1-digit major and 2-digit minor and patch versions (n,nn,nn) + # Plugin version (major,minor,patch) where major>1, 0<=minor<=99 and 0<=patch<=99 + # The release infrastructure expects 'vN.NN.NN' tags with 1-digit major and 2-digit minor and patch versions + # and it takes care of converting the python tuple '(1,0,1)' into a version string 'v1.00.01' + # NB! Do not use '(1,00,01)' here: leading zeros in decimal integer literals are not permitted in python (#1013) + __version__ = (1,0,1) minimal_mg5amcnlo_version = (3,6,0) maximal_mg5amcnlo_version = (1000,1000,1000) From 84dfc5b36443e1c9b8ee10e779bcf4bcb206baba Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 3 Oct 2024 18:15:24 +0200 Subject: [PATCH 04/11] [post-release] fix archiver.sh and gitTag.sh to produce '1.00.01' from input '(1,0,1)' in __init__.py (issue #1013) --- .github/workflows/archiver.sh | 11 ++++++++++- .github/workflows/archiver.yml | 4 ++-- epochX/cudacpp/gitTag.sh | 12 +++++++++--- 3 files changed, 21 insertions(+), 6 deletions(-) diff --git a/.github/workflows/archiver.sh b/.github/workflows/archiver.sh index 41f3a7b63c..5fea8d5fb5 100755 --- a/.github/workflows/archiver.sh +++ b/.github/workflows/archiver.sh @@ -26,8 +26,17 @@ mkdir ${outdir} outfile=${outdir}/VERSION.txt touch ${outfile} dateformat='%Y-%m-%d_%H:%M:%S UTC' +cudacpp_major=$(cat __init__.py | grep __version__ | sed -r 's/(.*=|\(|\)|,)/ /g' | awk '{print $1}') +cudacpp_minor=$(cat __init__.py | grep __version__ | sed -r 's/(.*=|\(|\)|,)/ /g' | awk '{print $2}') +cudacpp_patch=$(cat __init__.py | grep __version__ | sed -r 's/(.*=|\(|\)|,)/ /g' | awk '{print $3}') +###echo "(From CUDACPP_OUTPUT/__init__.py)" +###echo "cudacpp (major, minor, patch) = ( ${cudacpp_major}, ${cudacpp_minor}, ${cudacpp_patch} )" +if [ ${cudacpp_major} -lt 0 ] || [ ${cudacpp_major} -gt 99 ]; then echo "ERROR! cudacpp_major is not in the [0,99] range"; exit 1; fi +if [ ${cudacpp_minor} -lt 0 ] || [ ${cudacpp_minor} -gt 99 ]; then echo "ERROR! cudacpp_minor is not in the [0,99] range"; exit 1; fi +if [ ${cudacpp_patch} -lt 0 ] || [ ${cudacpp_patch} -gt 99 ]; then echo "ERROR! cudacpp_patch is not in the [0,99] range"; exit 1; fi +cudacpp_version=$(printf "%1d.%02d.%02d" ${cudacpp_major} ${cudacpp_minor} ${cudacpp_patch}) echo "(From CUDACPP_OUTPUT/__init__.py)" >> ${outfile} -echo "cudacpp_version = $(cat __init__.py | awk '/__version__/{print $3}' | sed 's/(//' | sed 's/)//' | sed 's/,/./g')" >> ${outfile} +echo "cudacpp_version = ${cudacpp_version}" >> ${outfile} echo "mg5_version_minimal = $(cat __init__.py | awk '/minimal_mg5amcnlo_version/{print $3}' | sed 's/(//' | sed 's/)//' | sed 's/,/./g')" >> ${outfile} echo "mg5_version_latest_validated = $(cat __init__.py | awk '/latest_validated_version/{print $3}' | sed 's/(//' | sed 's/)//' | sed 's/,/./g')" >> ${outfile} echo "" >> ${outfile} diff --git a/.github/workflows/archiver.yml b/.github/workflows/archiver.yml index dd2127ffc2..9266c57528 100644 --- a/.github/workflows/archiver.yml +++ b/.github/workflows/archiver.yml @@ -13,8 +13,8 @@ on: push: tags: - # Include version tags such as 'cudacpp_for3.6.0_v1.0.0' or 'cudacpp_for3.6.0_v1.0.0_test001' - # Include version tags such as 'valassi_cudacpp_for3.6.0_v1.0.0' or 'valassi_cudacpp_for3.6.0_v1.0.0_test001' + # Include version tags such as 'cudacpp_for3.6.0_v1.00.00' or 'cudacpp_for3.6.0_v1.00.00_test001' + # Include version tags such as 'valassi_cudacpp_for3.6.0_v1.00.00' or 'valassi_cudacpp_for3.6.0_v1.00.00_test001' - '*cudacpp_for*_v*' # Exclude running tags such as 'cudacpp_for3.6.0_latest' diff --git a/epochX/cudacpp/gitTag.sh b/epochX/cudacpp/gitTag.sh index 7506fc3abc..db5dd36b61 100755 --- a/epochX/cudacpp/gitTag.sh +++ b/epochX/cudacpp/gitTag.sh @@ -22,8 +22,8 @@ function usage() { echo "Usage (1): $0 [-f] " echo "Creates a new version tag (from the HEAD of the local branch) and pushes it to the remote repository" - echo "Valid formats for are 'n1.n2.n3' or 'n1.n2.n3_txt' where txt only contains letters or digits" - echo "Version number 'n1.n2.n3' must match that in the CUDACPP_OUTPUT/__init__.py file" + echo "Valid formats for are 'n.nn.nn' or 'n.nn.nn_txt' where txt only contains letters or digits)" + echo "Version number must match the (n1,n2,n3) specified with single digits in the CUDACPP_OUTPUT/__init__.py file" echo "For release tags (no trailing '_txt'), the github CI will then create also a running tag with '_latest' suffix" echo "Use the -f option to delete and recreate a version tag that already exists" echo "" @@ -156,7 +156,13 @@ else # Determine cudacpp_version (as in archiver.sh) echo "INFO: determine cudacpp and mg5amc versions" - cudacpp_version=$(cat ${topdir}/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py | awk '/__version__/{print $3}' | sed 's/(//' | sed 's/)//' | sed 's/,/./g') + cudacpp_major=$(cat ${topdir}/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py | grep __version__ | sed -r 's/(.*=|\(|\)|,)/ /g' | awk '{print $1}') + cudacpp_minor=$(cat ${topdir}/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py | grep __version__ | sed -r 's/(.*=|\(|\)|,)/ /g' | awk '{print $2}') + cudacpp_patch=$(cat ${topdir}/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/__init__.py | grep __version__ | sed -r 's/(.*=|\(|\)|,)/ /g' | awk '{print $3}') + if [ ${cudacpp_major} -lt 0 ] || [ ${cudacpp_major} -gt 99 ]; then echo "ERROR! cudacpp_major is not in the [0,99] range"; exit 1; fi + if [ ${cudacpp_minor} -lt 0 ] || [ ${cudacpp_minor} -gt 99 ]; then echo "ERROR! cudacpp_minor is not in the [0,99] range"; exit 1; fi + if [ ${cudacpp_patch} -lt 0 ] || [ ${cudacpp_patch} -gt 99 ]; then echo "ERROR! cudacpp_patch is not in the [0,99] range"; exit 1; fi + cudacpp_version=$(printf "%1d.%02d.%02d" ${cudacpp_major} ${cudacpp_minor} ${cudacpp_patch}) echo "> cudacpp_version = $cudacpp_version" # Determine mg5_version (as in HEPToolInstaller.py) From be85363b66a20d767d2cbc068afe91374345d394 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 3 Oct 2024 19:00:57 +0200 Subject: [PATCH 05/11] [post-release] in CHANGELOG.md, document the fixes for #1013 --- .../cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md index 7ebe38f445..26b0d0567f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md @@ -12,6 +12,11 @@ The format is loosely based on [Keep a Changelog](https://keepachangelog.com). - Updated cudacpp version to 1.00.01. +### Fixed + +- Infrastructure issues + - AV ([#1013]) Fix release scripts to create 'v1.00.01' tags from a '(1,0,1)' python tuple. + -------------------------------------------------------------------------------- ## [1.00.00] - 2024-10-03 @@ -52,3 +57,4 @@ The format is loosely based on [Keep a Changelog](https://keepachangelog.com). [#959]: https://github.com/madgraph5/madgraph4gpu/issues/959 [#993]: https://github.com/madgraph5/madgraph4gpu/issues/993 [#1011]: https://github.com/madgraph5/madgraph4gpu/issues/1011 +[#1013]: https://github.com/madgraph5/madgraph4gpu/issues/1013 From dbb99400d3e18ef89b79db534657a6618b0e3b45 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 3 Oct 2024 16:16:25 +0300 Subject: [PATCH 06/11] [amd] in gg_tt.mad and CODEGEN, workaround for FPE #1011 in vxxxxx on HIP: replace "pvec0 / ( vmass * pp )" by "pvec0 / vmass / pp" --- .../CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h | 5 ++++- epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h | 5 ++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h index 95ffb65cd0..fcfc4b3153 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/aloha/template_files/gpu/helas.h @@ -451,7 +451,10 @@ } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h index 561a125384..febf1dcf42 100644 --- a/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.mad/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) From 13ebdbe63572a33ebc41a14e38533160ad45ba63 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Thu, 3 Oct 2024 15:28:54 +0200 Subject: [PATCH 07/11] [amd] in CHANGELOG.md, document the workaround for FPE #1011 on HIP --- epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md index 26b0d0567f..dcb7de5b1f 100644 --- a/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md +++ b/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/CHANGELOG.md @@ -14,6 +14,9 @@ The format is loosely based on [Keep a Changelog](https://keepachangelog.com). ### Fixed +- Platform-specific issues + - AV ([#1011]) Added workaround for Floating Point Exceptions in vxxxxx in the HIP backend. + - Infrastructure issues - AV ([#1013]) Fix release scripts to create 'v1.00.01' tags from a '(1,0,1)' python tuple. From dcf3a99788648d263b3b7c76425a52dafc724675 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 4 Oct 2024 08:22:53 +0200 Subject: [PATCH 08/11] [amd] regenerate all processes with the workaround for HIP FPE #1011 --- .../ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt | 16 +++++++------- epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h | 5 ++++- .../CODEGEN_cudacpp_ee_mumu_log.txt | 16 +++++++------- epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h | 5 ++++- .../gg_tt.mad/CODEGEN_mad_gg_tt_log.txt | 12 +++++----- .../gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt | 10 ++++----- epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h | 5 ++++- .../gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt | 20 ++++++++--------- epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h | 5 ++++- .../gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt | 20 ++++++++--------- epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h | 5 ++++- .../gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt | 10 ++++----- epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h | 5 ++++- .../gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt | 16 +++++++------- epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h | 5 ++++- .../CODEGEN_cudacpp_gg_ttgg_log.txt | 14 ++++++------ epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h | 5 ++++- .../gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt | 16 +++++++------- epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h | 5 ++++- .../CODEGEN_cudacpp_gg_ttggg_log.txt | 16 +++++++------- epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h | 5 ++++- .../gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt | 20 ++++++++--------- epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h | 5 ++++- .../gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt | 12 +++++----- epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h | 5 ++++- .../CODEGEN_mad_heft_gg_bb_log.txt | 16 +++++++------- .../cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h | 5 ++++- .../CODEGEN_cudacpp_heft_gg_bb_log.txt | 14 ++++-------- .../cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h | 5 ++++- .../CODEGEN_mad_nobm_pp_ttW_log.txt | 22 +++++++++---------- .../src/HelAmps_sm_no_b_mass.h | 5 ++++- .../CODEGEN_mad_pp_tt012j_log.txt | 22 +++++++++---------- epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h | 5 ++++- .../CODEGEN_mad_smeft_gg_tttt_log.txt | 18 +++++++-------- .../HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h | 5 ++++- .../CODEGEN_cudacpp_smeft_gg_tttt_log.txt | 14 ++++++------ .../HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h | 5 ++++- .../CODEGEN_mad_susy_gg_t1t1_log.txt | 12 +++++----- .../susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h | 5 ++++- .../CODEGEN_cudacpp_susy_gg_t1t1_log.txt | 8 +++---- .../susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h | 5 ++++- .../CODEGEN_mad_susy_gg_tt_log.txt | 12 +++++----- .../susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h | 5 ++++- .../CODEGEN_cudacpp_susy_gg_tt_log.txt | 14 +++++------- .../susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h | 5 ++++- 45 files changed, 258 insertions(+), 202 deletions(-) diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt index 3e4b7a7f2c..30d3ffc088 100644 --- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt @@ -58,7 +58,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006308317184448242  +DEBUG: model prefixing takes 0.006434440612792969  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.005 s +1 processes with 2 diagrams generated in 0.004 s Total: 1 processes with 2 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -180,19 +180,19 @@ INFO: Finding symmetric diagrams for subprocess group epem_mupmum DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1552]  Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s -Wrote files for 8 helas calls in 0.070 s +Wrote files for 8 helas calls in 0.069 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines -ALOHA: aloha creates 3 routines in 0.199 s +ALOHA: aloha creates 3 routines in 0.201 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 7 routines in 0.252 s +ALOHA: aloha creates 7 routines in 0.255 s FFV1 FFV1 FFV2 @@ -232,9 +232,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.042s -user 0m1.792s -sys 0m0.243s +real 0m2.097s +user 0m1.775s +sys 0m0.272s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h index f233eee768..18f664e0d1 100644 --- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt index 997b34d3d3..1858165757 100644 --- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt +++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt @@ -58,7 +58,7 @@ generate e+ e- > mu+ mu- No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006402015686035156  +DEBUG: model prefixing takes 0.0062215328216552734  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -150,7 +150,7 @@ INFO: Checking for minimal orders which gives processes. INFO: Please specify coupling orders to bypass this step. INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1 INFO: Process has 2 diagrams -1 processes with 2 diagrams generated in 0.004 s +1 processes with 2 diagrams generated in 0.005 s Total: 1 processes with 2 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -169,13 +169,13 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. -Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s +Generated helas calls for 1 subprocesses (2 diagrams) in 0.003 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates FFV4 routines ALOHA: aloha creates FFV2_4 routines -ALOHA: aloha creates 4 routines in 0.265 s +ALOHA: aloha creates 4 routines in 0.267 s FFV1 FFV1 FFV2 @@ -194,7 +194,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. quit -real 0m0.659s -user 0m0.600s -sys 0m0.042s -Code generation completed in 0 seconds +real 0m0.781s +user 0m0.590s +sys 0m0.053s +Code generation completed in 1 seconds diff --git a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h index f233eee768..18f664e0d1 100644 --- a/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/ee_mumu.sa/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt index 0ef2980778..0384ed0547 100644 --- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt @@ -58,7 +58,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006418943405151367  +DEBUG: model prefixing takes 0.0059719085693359375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -181,12 +181,12 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1552]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s -Wrote files for 10 helas calls in 0.071 s +Wrote files for 10 helas calls in 0.072 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.153 s +ALOHA: aloha creates 2 routines in 0.150 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines @@ -226,9 +226,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m1.900s -user 0m1.626s -sys 0m0.264s +real 0m1.997s +user 0m1.613s +sys 0m0.278s Code generation completed in 2 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt index 26859444af..ada2d7b4a3 100644 --- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt +++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt @@ -58,7 +58,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006226539611816406  +DEBUG: model prefixing takes 0.006254673004150391  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -174,7 +174,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.143 s +ALOHA: aloha creates 2 routines in 0.144 s VVV1 FFV1 FFV1 @@ -189,7 +189,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. quit -real 0m0.559s -user 0m0.480s -sys 0m0.044s +real 0m0.532s +user 0m0.478s +sys 0m0.045s Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h index 561a125384..febf1dcf42 100644 --- a/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt.sa/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt index 05eb1be921..3922a1c111 100644 --- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt +++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt @@ -58,7 +58,7 @@ generate g g > t t~ No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006343364715576172  +DEBUG: model prefixing takes 0.006289482116699219  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -159,7 +159,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.020 s +1 processes with 16 diagrams generated in 0.019 s Total: 2 processes with 19 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -201,8 +201,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: len(subproc_diagrams_for_config) =  3 [model_handling.py at line 1527]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1552]  -Generated helas calls for 2 subprocesses (19 diagrams) in 0.045 s -Wrote files for 46 helas calls in 0.191 s +Generated helas calls for 2 subprocesses (19 diagrams) in 0.042 s +Wrote files for 46 helas calls in 0.189 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -210,14 +210,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.326 s +ALOHA: aloha creates 5 routines in 0.338 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.306 s +ALOHA: aloha creates 10 routines in 0.311 s VVV1 VVV1 FFV1 @@ -265,10 +265,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.598s -user 0m2.282s -sys 0m0.314s -Code generation completed in 3 seconds +real 0m2.618s +user 0m2.304s +sys 0m0.310s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h index de2df9841e..ff9f0d7f00 100644 --- a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt index ef26d2703a..871e6fde69 100644 --- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt @@ -58,7 +58,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006256103515625  +DEBUG: model prefixing takes 0.0062618255615234375  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -151,7 +151,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1 INFO: Process has 16 diagrams -1 processes with 16 diagrams generated in 0.021 s +1 processes with 16 diagrams generated in 0.022 s Total: 1 processes with 16 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -180,8 +180,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxg DEBUG: len(subproc_diagrams_for_config) =  15 [model_handling.py at line 1527]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [model_handling.py at line 1552]  -Generated helas calls for 1 subprocesses (16 diagrams) in 0.037 s -Wrote files for 36 helas calls in 0.120 s +Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s +Wrote files for 36 helas calls in 0.123 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -189,14 +189,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.362 s +ALOHA: aloha creates 5 routines in 1.397 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 10 routines in 0.311 s +ALOHA: aloha creates 10 routines in 0.315 s VVV1 VVV1 FFV1 @@ -237,10 +237,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.525s -user 0m2.188s -sys 0m0.271s -Code generation completed in 3 seconds +real 0m3.568s +user 0m2.185s +sys 0m0.276s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h index de2df9841e..ff9f0d7f00 100644 --- a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt index e2b1c58e67..c0f0ecac53 100644 --- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt +++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt @@ -58,7 +58,7 @@ generate g g > t t~ g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006231069564819336  +DEBUG: model prefixing takes 0.006242036819458008  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -177,7 +177,7 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 set of routines with options: P0 ALOHA: aloha creates VVVV3 set of routines with options: P0 ALOHA: aloha creates VVVV4 set of routines with options: P0 -ALOHA: aloha creates 5 routines in 0.330 s +ALOHA: aloha creates 5 routines in 0.326 s VVV1 VVV1 FFV1 @@ -197,7 +197,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. quit -real 0m0.806s -user 0m0.706s -sys 0m0.070s +real 0m0.777s +user 0m0.714s +sys 0m0.058s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h index de2df9841e..ff9f0d7f00 100644 --- a/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttg.sa/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt index e9a07aca5d..20192cdf8a 100644 --- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt @@ -58,7 +58,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0064373016357421875  +DEBUG: model prefixing takes 0.006398916244506836  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -180,8 +180,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxgg DEBUG: len(subproc_diagrams_for_config) =  105 [model_handling.py at line 1527]  DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [model_handling.py at line 1552]  -Generated helas calls for 1 subprocesses (123 diagrams) in 0.418 s -Wrote files for 222 helas calls in 0.663 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.426 s +Wrote files for 222 helas calls in 0.660 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -189,14 +189,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.328 s +ALOHA: aloha creates 5 routines in 0.331 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.312 s +ALOHA: aloha creates 10 routines in 0.314 s VVV1 VVV1 FFV1 @@ -240,9 +240,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.825s -user 0m3.498s -sys 0m0.267s +real 0m3.856s +user 0m3.505s +sys 0m0.295s Code generation completed in 4 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h index c173c49208..53dd560ed6 100644 --- a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt index 8c54492115..641c68b009 100644 --- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt +++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt @@ -58,7 +58,7 @@ generate g g > t t~ g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.0061643123626708984  +DEBUG: model prefixing takes 0.006249666213989258  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -151,7 +151,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1 INFO: Process has 123 diagrams -1 processes with 123 diagrams generated in 0.156 s +1 processes with 123 diagrams generated in 0.160 s Total: 1 processes with 123 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -170,14 +170,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. -Generated helas calls for 1 subprocesses (123 diagrams) in 0.420 s +Generated helas calls for 1 subprocesses (123 diagrams) in 0.427 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.320 s +ALOHA: aloha creates 5 routines in 0.325 s VVV1 VVV1 FFV1 @@ -200,7 +200,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. quit -real 0m1.515s -user 0m1.355s -sys 0m0.065s +real 0m1.529s +user 0m1.382s +sys 0m0.063s Code generation completed in 2 seconds diff --git a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h index c173c49208..53dd560ed6 100644 --- a/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttgg.sa/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt index 3bfe188383..4e8f48ed8b 100644 --- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt @@ -58,7 +58,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006472587585449219  +DEBUG: model prefixing takes 0.00632476806640625  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -182,8 +182,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxggg DEBUG: len(subproc_diagrams_for_config) =  945 [model_handling.py at line 1527]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [model_handling.py at line 1552]  -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.530 s -Wrote files for 2281 helas calls in 18.418 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.525 s +Wrote files for 2281 helas calls in 18.363 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -191,14 +191,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.369 s +ALOHA: aloha creates 5 routines in 0.361 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.311 s +ALOHA: aloha creates 10 routines in 0.312 s VVV1 VVV1 FFV1 @@ -242,9 +242,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m32.534s -user 0m31.970s -sys 0m0.462s +real 0m32.585s +user 0m32.009s +sys 0m0.446s Code generation completed in 33 seconds ************************************************************ * * diff --git a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h index c173c49208..53dd560ed6 100644 --- a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt index e615bf399b..c4b2d61a21 100644 --- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt +++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt @@ -58,7 +58,7 @@ generate g g > t t~ g g g No model currently active, so we import the Standard Model INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006009101867675781  +DEBUG: model prefixing takes 0.006146430969238281  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -151,7 +151,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1 INFO: Process has 1240 diagrams -1 processes with 1240 diagrams generated in 1.880 s +1 processes with 1240 diagrams generated in 1.893 s Total: 1 processes with 1240 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -170,14 +170,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. -Generated helas calls for 1 subprocesses (1240 diagrams) in 6.548 s +Generated helas calls for 1 subprocesses (1240 diagrams) in 6.631 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.346 s +ALOHA: aloha creates 5 routines in 0.351 s VVV1 VVV1 FFV1 @@ -200,7 +200,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. quit -real 0m13.133s -user 0m12.909s -sys 0m0.108s -Code generation completed in 13 seconds +real 0m13.234s +user 0m12.950s +sys 0m0.101s +Code generation completed in 14 seconds diff --git a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h index c173c49208..53dd560ed6 100644 --- a/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gg_ttggg.sa/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt index 6e05d8fa66..b3ce4a6716 100644 --- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt @@ -57,7 +57,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006659030914306641  +DEBUG: model prefixing takes 0.0063931941986083984  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -166,7 +166,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.082 s +8 processes with 40 diagrams generated in 0.078 s Total: 8 processes with 40 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -214,17 +214,17 @@ INFO: Finding symmetric diagrams for subprocess group gux_ttxux DEBUG: len(subproc_diagrams_for_config) =  5 [model_handling.py at line 1527]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [model_handling.py at line 1552]  -Generated helas calls for 2 subprocesses (10 diagrams) in 0.033 s -Wrote files for 32 helas calls in 0.179 s +Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s +Wrote files for 32 helas calls in 0.163 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.150 s +ALOHA: aloha creates 2 routines in 0.145 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 4 routines in 0.137 s +ALOHA: aloha creates 4 routines in 0.132 s FFV1 FFV1 FFV1 @@ -270,10 +270,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.263s -user 0m1.939s -sys 0m0.315s -Code generation completed in 3 seconds +real 0m2.176s +user 0m1.872s +sys 0m0.303s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h index e2ea56740c..a304fc85c8 100644 --- a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt index c3ab012992..6483e0d003 100644 --- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt +++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt @@ -57,7 +57,7 @@ set zerowidth_tchannel F define q = u c d s u~ c~ d~ s~ INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.005918264389038086  +DEBUG: model prefixing takes 0.006114006042480469  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -166,7 +166,7 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams. INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. -8 processes with 40 diagrams generated in 0.079 s +8 processes with 40 diagrams generated in 0.077 s Total: 8 processes with 40 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -206,7 +206,7 @@ Generated helas calls for 2 subprocesses (10 diagrams) in 0.030 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVV1 routines -ALOHA: aloha creates 2 routines in 0.142 s +ALOHA: aloha creates 2 routines in 0.146 s FFV1 FFV1 FFV1 @@ -222,7 +222,7 @@ INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. quit -real 0m0.666s -user 0m0.588s -sys 0m0.054s +real 0m0.934s +user 0m0.600s +sys 0m0.050s Code generation completed in 0 seconds diff --git a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h index e2ea56740c..a304fc85c8 100644 --- a/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h +++ b/epochX/cudacpp/gq_ttq.sa/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt index 3e34bcb537..0ae7218027 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt @@ -123,7 +123,7 @@ Defined multiparticle all = g u c d s u~ c~ d~ s~ a ve vm vt e- mu- ve~ vm~ vt~ generate g g > b b~ HIW<=1 INFO: Trying process: g g > b b~ HIG<=1 HIW<=1 @1 INFO: Process has 4 diagrams -1 processes with 4 diagrams generated in 0.006 s +1 processes with 4 diagrams generated in 0.005 s Total: 1 processes with 4 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -153,20 +153,20 @@ INFO: Finding symmetric diagrams for subprocess group gg_bbx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4} [model_handling.py at line 1552]  Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s -Wrote files for 12 helas calls in 0.078 s +Wrote files for 12 helas calls in 0.076 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.262 s +ALOHA: aloha creates 4 routines in 0.264 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 8 routines in 0.253 s +ALOHA: aloha creates 8 routines in 0.249 s VVS3 VVV1 FFV1 @@ -204,10 +204,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.135s -user 0m1.842s -sys 0m0.288s -Code generation completed in 3 seconds +real 0m2.343s +user 0m1.855s +sys 0m0.277s +Code generation completed in 2 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h index 25b333b882..1b04401547 100644 --- a/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h +++ b/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt index 522c7ba21a..78ac3c603d 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt +++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt @@ -58,12 +58,6 @@ set auto_convert_model T save options auto_convert_model save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt import model heft -INFO: reload from .py file -INFO: load particles -INFO: load vertices -WARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -WARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model.  -DEBUG: model prefixing takes 0.005913972854614258  INFO: Restrict model heft with file models/heft/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: s u w+ at order: QED=1  @@ -154,7 +148,7 @@ ALOHA: aloha creates VVS3 routines ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFS2 routines -ALOHA: aloha creates 4 routines in 0.269 s +ALOHA: aloha creates 4 routines in 0.260 s VVS3 VVV1 FFV1 @@ -171,7 +165,7 @@ INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. quit -real 0m0.677s -user 0m0.624s -sys 0m0.045s +real 0m1.090s +user 0m0.571s +sys 0m0.062s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h index 25b333b882..1b04401547 100644 --- a/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h +++ b/epochX/cudacpp/heft_gg_bb.sa/src/HelAmps_heft.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt index d042e84abb..1f74eb715f 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt +++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt @@ -57,7 +57,7 @@ set zerowidth_tchannel F import model sm-no_b_mass INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006539821624755859  +DEBUG: model prefixing takes 0.006134510040283203  INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -181,7 +181,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w- INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ -4 processes with 8 diagrams generated in 0.107 s +4 processes with 8 diagrams generated in 0.106 s Total: 4 processes with 8 diagrams add process p p > t t~ w j @1 INFO: Checking for minimal orders which gives processes. @@ -223,7 +223,7 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~ INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g -12 processes with 144 diagrams generated in 0.644 s +12 processes with 144 diagrams generated in 0.648 s Total: 16 processes with 152 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -351,19 +351,19 @@ INFO: Finding symmetric diagrams for subprocess group dux_ttxwm DEBUG: len(subproc_diagrams_for_config) =  2 [model_handling.py at line 1527]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2} [model_handling.py at line 1552]  -Generated helas calls for 8 subprocesses (76 diagrams) in 0.200 s -Wrote files for 212 helas calls in 0.833 s +Generated helas calls for 8 subprocesses (76 diagrams) in 0.201 s +Wrote files for 212 helas calls in 0.838 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 3 routines in 0.202 s +ALOHA: aloha creates 3 routines in 0.203 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates FFV1 routines ALOHA: aloha creates FFV2 routines ALOHA: aloha creates VVV1 set of routines with options: P0 -ALOHA: aloha creates 6 routines in 0.204 s +ALOHA: aloha creates 6 routines in 0.200 s FFV1 FFV1 FFV1 @@ -459,10 +459,10 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m4.657s -user 0m4.116s -sys 0m0.520s -Code generation completed in 5 seconds +real 0m4.785s +user 0m4.104s +sys 0m0.539s +Code generation completed in 4 seconds ************************************************************ * * * W E L C O M E to * diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h b/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h index 0bbfaa3e86..850b86e0e6 100644 --- a/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h +++ b/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt index 2b1f5e5a25..4c20a350e7 100644 --- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt +++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt @@ -57,7 +57,7 @@ set zerowidth_tchannel F define j = p INFO: load particles INFO: load vertices -DEBUG: model prefixing takes 0.006169557571411133  +DEBUG: model prefixing takes 0.006647348403930664  INFO: Restrict model sm with file models/sm/restrict_default.dat . DEBUG: Simplifying conditional expressions  DEBUG: remove interactions: u s w+ at order: QED=1  @@ -168,7 +168,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~ INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ -5 processes with 7 diagrams generated in 0.029 s +5 processes with 7 diagrams generated in 0.031 s Total: 5 processes with 7 diagrams add process p p > t t~ j @1 INFO: Checking for minimal orders which gives processes. @@ -208,7 +208,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~ INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g -13 processes with 76 diagrams generated in 0.142 s +13 processes with 76 diagrams generated in 0.146 s Total: 18 processes with 83 diagrams add process p p > t t~ j j @2 INFO: Checking for minimal orders which gives processes. @@ -374,7 +374,7 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~ INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. -65 processes with 1119 diagrams generated in 1.821 s +65 processes with 1119 diagrams generated in 1.946 s Total: 83 processes with 1202 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -689,8 +689,8 @@ INFO: Finding symmetric diagrams for subprocess group uux_ttx DEBUG: len(subproc_diagrams_for_config) =  1 [model_handling.py at line 1527]  DEBUG: iconfig_to_diag =  {1: 1} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1} [model_handling.py at line 1552]  -Generated helas calls for 18 subprocesses (372 diagrams) in 1.275 s -Wrote files for 810 helas calls in 2.836 s +Generated helas calls for 18 subprocesses (372 diagrams) in 1.306 s +Wrote files for 810 helas calls in 2.776 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines @@ -698,14 +698,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 5 routines in 0.339 s +ALOHA: aloha creates 5 routines in 0.338 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV3 routines ALOHA: aloha creates VVVV4 routines -ALOHA: aloha creates 10 routines in 0.315 s +ALOHA: aloha creates 10 routines in 0.314 s VVV1 VVV1 FFV1 @@ -883,9 +883,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m10.516s -user 0m9.558s -sys 0m0.917s +real 0m10.704s +user 0m9.729s +sys 0m0.940s Code generation completed in 10 seconds ************************************************************ * * diff --git a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h index c173c49208..53dd560ed6 100644 --- a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h +++ b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt index 404e77951a..6d15da35b5 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt @@ -73,7 +73,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.1275484561920166  +DEBUG: model prefixing takes 0.12750768661499023  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -88,7 +88,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.654 s +1 processes with 72 diagrams generated in 3.691 s Total: 1 processes with 72 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -117,8 +117,8 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttxttx DEBUG: len(subproc_diagrams_for_config) =  70 [model_handling.py at line 1527]  DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [model_handling.py at line 1552]  -Generated helas calls for 1 subprocesses (72 diagrams) in 0.185 s -Wrote files for 119 helas calls in 0.386 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.186 s +Wrote files for 119 helas calls in 0.384 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines @@ -126,14 +126,14 @@ ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.315 s +ALOHA: aloha creates 5 routines in 0.318 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 10 routines in 0.328 s +ALOHA: aloha creates 10 routines in 0.332 s VVV5 VVV5 FFV1 @@ -174,9 +174,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m7.080s -user 0m6.759s -sys 0m0.305s +real 0m7.131s +user 0m6.830s +sys 0m0.285s Code generation completed in 7 seconds ************************************************************ * * diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h index 736342fc49..98fc59d3ea 100644 --- a/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h +++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt index d114668f25..7e4394e2dd 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt +++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt @@ -73,7 +73,7 @@ INFO: load vertices DEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1)  DEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3)  DEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1)  -DEBUG: model prefixing takes 0.12795209884643555  +DEBUG: model prefixing takes 0.1275796890258789  INFO: Change particles name to pass to MG5 convention Defined multiparticle p = g u c d s u~ c~ d~ s~ Defined multiparticle j = g u c d s u~ c~ d~ s~ @@ -88,7 +88,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1 INFO: Process has 72 diagrams -1 processes with 72 diagrams generated in 3.734 s +1 processes with 72 diagrams generated in 3.713 s Total: 1 processes with 72 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -107,14 +107,14 @@ INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TM FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h FileWriter for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. -Generated helas calls for 1 subprocesses (72 diagrams) in 0.191 s +Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV5 routines ALOHA: aloha creates FFV1 routines ALOHA: aloha creates VVVV1 routines ALOHA: aloha creates VVVV9 routines ALOHA: aloha creates VVVV10 routines -ALOHA: aloha creates 5 routines in 0.333 s +ALOHA: aloha creates 5 routines in 0.320 s VVV5 VVV5 FFV1 @@ -134,7 +134,7 @@ INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SME INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. quit -real 0m5.188s -user 0m5.079s +real 0m5.211s +user 0m5.033s sys 0m0.067s -Code generation completed in 6 seconds +Code generation completed in 5 seconds diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h index 736342fc49..98fc59d3ea 100644 --- a/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h +++ b/epochX/cudacpp/smeft_gg_tttt.sa/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt index 4d16903643..90e13a925d 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt @@ -550,7 +550,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.120 s +1 processes with 6 diagrams generated in 0.123 s Total: 1 processes with 6 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -580,7 +580,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_t1t1x DEBUG: iconfig_to_diag =  {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [model_handling.py at line 1552]  Generated helas calls for 1 subprocesses (6 diagrams) in 0.009 s -Wrote files for 16 helas calls in 0.082 s +Wrote files for 16 helas calls in 0.081 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -591,7 +591,7 @@ ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates VSS1 routines ALOHA: aloha creates VVSS1 routines -ALOHA: aloha creates 6 routines in 0.182 s +ALOHA: aloha creates 6 routines in 0.181 s VVV1 VSS1 VSS1 @@ -628,9 +628,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m3.035s -user 0m2.668s -sys 0m0.310s +real 0m2.979s +user 0m2.658s +sys 0m0.321s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h index 70b096b0ae..ec627d7759 100644 --- a/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt index b99ef955e1..853e6fc8f7 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt +++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt @@ -550,7 +550,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1 INFO: Process has 6 diagrams -1 processes with 6 diagrams generated in 0.122 s +1 processes with 6 diagrams generated in 0.124 s Total: 1 processes with 6 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -590,7 +590,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. quit -real 0m1.322s -user 0m1.254s -sys 0m0.060s +real 0m1.331s +user 0m1.253s +sys 0m0.069s Code generation completed in 1 seconds diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h index 70b096b0ae..ec627d7759 100644 --- a/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_t1t1.sa/src/HelAmps_MSSM_SLHA2.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt index 57170a92d8..cfa8b980ff 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt @@ -550,7 +550,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.117 s +1 processes with 3 diagrams generated in 0.124 s Total: 1 processes with 3 diagrams output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32 Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -580,7 +580,7 @@ INFO: Finding symmetric diagrams for subprocess group gg_ttx DEBUG: iconfig_to_diag =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1551]  DEBUG: diag_to_iconfig =  {1: 1, 2: 2, 3: 3} [model_handling.py at line 1552]  Generated helas calls for 1 subprocesses (3 diagrams) in 0.007 s -Wrote files for 10 helas calls in 0.074 s +Wrote files for 10 helas calls in 0.075 s DEBUG: self.vector_size =  32 [export_v4.py at line 7023]  ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 @@ -589,7 +589,7 @@ ALOHA: aloha creates 2 routines in 0.136 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 4 routines in 0.133 s +ALOHA: aloha creates 4 routines in 0.136 s VVV1 FFV1 FFV1 @@ -625,9 +625,9 @@ Type "launch" to generate events from this process, or see Run "open index.html" to see more information about this process. quit -real 0m2.841s -user 0m2.536s -sys 0m0.299s +real 0m3.046s +user 0m2.549s +sys 0m0.290s Code generation completed in 3 seconds ************************************************************ * * diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h index 9d249ac058..9ed58e24f1 100644 --- a/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt index 7aa094ccef..8f97de9855 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt +++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt @@ -55,10 +55,6 @@ set stdout_level DEBUG set output information to level: 10 set zerowidth_tchannel F import model MSSM_SLHA2 -INFO: reload from .py file -INFO: load particles -INFO: load vertices -DEBUG: model prefixing takes 0.9066345691680908  INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . INFO: Detect SLHA2 format. keeping restricted parameter in the param_card DEBUG: Simplifying conditional expressions  @@ -554,7 +550,7 @@ INFO: Please specify coupling orders to bypass this step. INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED INFO: Trying process: g g > t t~ WEIGHTED<=2 @1 INFO: Process has 3 diagrams -1 processes with 3 diagrams generated in 0.113 s +1 processes with 3 diagrams generated in 0.125 s Total: 1 processes with 3 diagrams output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt Output will be done with PLUGIN: CUDACPP_OUTPUT @@ -577,7 +573,7 @@ Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s ALOHA: aloha starts to compute helicity amplitudes ALOHA: aloha creates VVV1 set of routines with options: P0 ALOHA: aloha creates FFV1 routines -ALOHA: aloha creates 2 routines in 0.138 s +ALOHA: aloha creates 2 routines in 0.145 s VVV1 FFV1 FFV1 @@ -592,7 +588,7 @@ INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in dire INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. quit -real 0m2.362s -user 0m2.295s -sys 0m0.055s +real 0m1.357s +user 0m1.261s +sys 0m0.066s Code generation completed in 2 seconds diff --git a/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h index 9d249ac058..9ed58e24f1 100644 --- a/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h +++ b/epochX/cudacpp/susy_gg_tt.sa/src/HelAmps_MSSM_SLHA2.h @@ -478,7 +478,10 @@ namespace mg5amcCpu } else { - const fptype emp = pvec0 / ( vmass * pp ); + //printf( "DEBUG1011 (before emp): pvec0=%f vmass=%f pp=%f vmass*pp=%f\n", pvec0, vmass, pp, vmass * pp ); + //const fptype emp = pvec / ( vmass * pp ); // this may give a FPE #1011 (why?! maybe when vmass=+-epsilon?) + const fptype emp = pvec0 / vmass / pp; // workaround for FPE #1011 + //printf( "DEBUG1011 (after emp): emp=%f\n", emp ); vc[2] = cxmake( hel0 * pp / vmass, 0. ); vc[5] = cxmake( hel0 * pvec3 * emp + hel * pt / pp * sqh, 0. ); if( pt != 0. ) From 0ec8c1cb53c1197d416ccee4ceda5bd1f19d519f Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 4 Oct 2024 16:13:32 +0300 Subject: [PATCH 09/11] [amd] rerun 96 tput builds and tests on LUMI worker node (small-g 72h) with the workaround for HIP FPEs #1011 - now all tests succeed ./tput/allTees.sh -hip STARTED AT Fri 04 Oct 2024 09:31:32 AM EEST ./tput/teeThroughputX.sh -mix -hrd -makej -eemumu -ggtt -ggttg -ggttgg -gqttq -ggttggg -makeclean -nocuda ENDED(1) AT Fri 04 Oct 2024 10:33:14 AM EEST [Status=0] ./tput/teeThroughputX.sh -flt -hrd -makej -eemumu -ggtt -ggttgg -inlonly -makeclean -nocuda ENDED(2) AT Fri 04 Oct 2024 11:09:17 AM EEST [Status=0] ./tput/teeThroughputX.sh -makej -eemumu -ggtt -ggttg -gqttq -ggttgg -ggttggg -flt -bridge -makeclean -nocuda ENDED(3) AT Fri 04 Oct 2024 11:17:27 AM EEST [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -rmbhst -nocuda ENDED(4) AT Fri 04 Oct 2024 11:19:15 AM EEST [Status=0] SKIP './tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common -nocuda' ENDED(5) AT Fri 04 Oct 2024 11:19:15 AM EEST [Status=0] ./tput/teeThroughputX.sh -eemumu -ggtt -ggttgg -flt -common -nocuda ENDED(6) AT Fri 04 Oct 2024 11:21:02 AM EEST [Status=0] ./tput/teeThroughputX.sh -mix -hrd -makej -susyggtt -susyggt1t1 -smeftggtttt -heftggbb -makeclean -nocuda ENDED(7) AT Fri 04 Oct 2024 11:53:25 AM EEST [Status=0] No errors found in logs No FPEs or '{ }' found in logs eemumu MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } eemumu MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } ggttggg MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } ggttggg MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } ggttgg MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } ggttgg MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } ggttg MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } ggttg MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } ggtt MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } ggtt MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } gqttq MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } gqttq MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } heftggbb MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } heftggbb MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } smeftggtttt MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } smeftggtttt MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } susyggt1t1 MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } susyggt1t1 MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } susyggtt MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } susyggtt MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } --- .../log_eemumu_mad_d_inl0_hrd0.txt | 258 +++++--------- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 276 ++++++--------- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 244 +++++-------- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 261 +++++--------- .../log_eemumu_mad_d_inl0_hrd1.txt | 254 +++++--------- .../log_eemumu_mad_d_inl1_hrd0.txt | 258 +++++--------- .../log_eemumu_mad_d_inl1_hrd1.txt | 258 +++++--------- .../log_eemumu_mad_f_inl0_hrd0.txt | 268 ++++++--------- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 284 ++++++--------- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 254 +++++--------- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 269 ++++++--------- .../log_eemumu_mad_f_inl0_hrd1.txt | 268 ++++++--------- .../log_eemumu_mad_f_inl1_hrd0.txt | 268 ++++++--------- .../log_eemumu_mad_f_inl1_hrd1.txt | 268 ++++++--------- .../log_eemumu_mad_m_inl0_hrd0.txt | 254 +++++--------- .../log_eemumu_mad_m_inl0_hrd1.txt | 254 +++++--------- .../log_ggtt_mad_d_inl0_hrd0.txt | 254 +++++--------- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 272 ++++++--------- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 240 +++++-------- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 257 +++++--------- .../log_ggtt_mad_d_inl0_hrd1.txt | 254 +++++--------- .../log_ggtt_mad_d_inl1_hrd0.txt | 254 +++++--------- .../log_ggtt_mad_d_inl1_hrd1.txt | 254 +++++--------- .../log_ggtt_mad_f_inl0_hrd0.txt | 272 ++++++--------- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 290 ++++++---------- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 264 ++++++-------- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 275 ++++++--------- .../log_ggtt_mad_f_inl0_hrd1.txt | 272 ++++++--------- .../log_ggtt_mad_f_inl1_hrd0.txt | 272 ++++++--------- .../log_ggtt_mad_f_inl1_hrd1.txt | 272 ++++++--------- .../log_ggtt_mad_m_inl0_hrd0.txt | 258 +++++--------- .../log_ggtt_mad_m_inl0_hrd1.txt | 258 +++++--------- .../log_ggttg_mad_d_inl0_hrd0.txt | 293 ++++++---------- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 315 +++++++---------- .../log_ggttg_mad_d_inl0_hrd1.txt | 293 ++++++---------- .../log_ggttg_mad_f_inl0_hrd0.txt | 301 +++++++--------- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 323 +++++++----------- .../log_ggttg_mad_f_inl0_hrd1.txt | 301 +++++++--------- .../log_ggttg_mad_m_inl0_hrd0.txt | 281 ++++++--------- .../log_ggttg_mad_m_inl0_hrd1.txt | 281 ++++++--------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 285 ++++++---------- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 307 +++++++---------- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 269 ++++++--------- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 290 ++++++---------- .../log_ggttgg_mad_d_inl0_hrd1.txt | 285 ++++++---------- .../log_ggttgg_mad_d_inl1_hrd0.txt | 289 ++++++---------- .../log_ggttgg_mad_d_inl1_hrd1.txt | 293 ++++++---------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 301 +++++++--------- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 323 +++++++----------- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 295 ++++++---------- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 306 +++++++---------- .../log_ggttgg_mad_f_inl0_hrd1.txt | 299 +++++++--------- .../log_ggttgg_mad_f_inl1_hrd0.txt | 297 +++++++--------- .../log_ggttgg_mad_f_inl1_hrd1.txt | 297 +++++++--------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 281 ++++++--------- .../log_ggttgg_mad_m_inl0_hrd1.txt | 281 ++++++--------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 237 ++++--------- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 261 ++++---------- .../log_ggttggg_mad_d_inl0_hrd1.txt | 237 ++++--------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 251 ++++---------- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 275 +++++---------- .../log_ggttggg_mad_f_inl0_hrd1.txt | 251 ++++---------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 233 ++++--------- .../log_ggttggg_mad_m_inl0_hrd1.txt | 233 ++++--------- .../log_gqttq_mad_d_inl0_hrd0.txt | 281 ++++++--------- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 303 ++++++---------- .../log_gqttq_mad_d_inl0_hrd1.txt | 281 ++++++--------- .../log_gqttq_mad_f_inl0_hrd0.txt | 297 +++++++--------- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 319 +++++++---------- .../log_gqttq_mad_f_inl0_hrd1.txt | 297 +++++++--------- .../log_gqttq_mad_m_inl0_hrd0.txt | 277 ++++++--------- .../log_gqttq_mad_m_inl0_hrd1.txt | 277 ++++++--------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 254 +++++--------- .../log_heftggbb_mad_d_inl0_hrd1.txt | 254 +++++--------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 270 ++++++--------- .../log_heftggbb_mad_f_inl0_hrd1.txt | 272 ++++++--------- .../log_heftggbb_mad_m_inl0_hrd0.txt | 252 +++++--------- .../log_heftggbb_mad_m_inl0_hrd1.txt | 252 +++++--------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 281 ++++++--------- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 281 ++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 301 +++++++--------- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 301 +++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 281 ++++++--------- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 281 ++++++--------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 250 +++++--------- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 250 +++++--------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 262 ++++++-------- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 262 ++++++-------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 254 +++++--------- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 254 +++++--------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 258 +++++--------- .../log_susyggtt_mad_d_inl0_hrd1.txt | 254 +++++--------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 270 ++++++--------- .../log_susyggtt_mad_f_inl0_hrd1.txt | 270 ++++++--------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 254 +++++--------- .../log_susyggtt_mad_m_inl0_hrd1.txt | 254 +++++--------- 96 files changed, 9508 insertions(+), 16674 deletions(-) diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index c3f0ed1d47..43da6e9aa5 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_10:23:05 -DATE: 2024-10-02_22:21:05 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.114935e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.582761e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.939652e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.823338 sec -INFO: No Floating Point Exceptions have been reported - 2,781,829,840 cycles # 2.927 GHz - 4,278,879,817 instructions # 1.54 insn per cycle - 1.128949739 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.209600e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.872254e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.989444e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.535787 sec +INFO: No Floating Point Exceptions have been reported + 1,434,722,098 cycles:u # 2.644 GHz (74.58%) + 2,578,399 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.56%) + 6,866,717 stalled-cycles-backend:u # 0.48% backend cycles idle (75.30%) + 2,088,564,042 instructions:u # 1.46 insn per cycle + # 0.00 stalled cycles per insn (74.67%) + 0.599328986 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.072198e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.251574e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.251574e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.292206 sec -INFO: No Floating Point Exceptions have been reported - 19,188,263,570 cycles # 3.045 GHz - 46,171,187,745 instructions # 2.41 insn per cycle - 6.302411306 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.383707e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.589135e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.589135e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.278206 sec +INFO: No Floating Point Exceptions have been reported + 17,739,462,314 cycles:u # 3.354 GHz (75.03%) + 50,106,117 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.04%) + 261,356,239 stalled-cycles-backend:u # 1.47% backend cycles idle (75.04%) + 47,091,390,697 instructions:u # 2.65 insn per cycle + # 0.01 stalled cycles per insn (75.04%) + 5.293316763 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.615174e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.112322e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.112322e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.331258 sec -INFO: No Floating Point Exceptions have been reported - 13,153,752,094 cycles # 3.031 GHz - 31,715,681,802 instructions # 2.41 insn per cycle - 4.341524872 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.029301e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.540119e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.540119e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.825031 sec +INFO: No Floating Point Exceptions have been reported + 12,681,894,597 cycles:u # 3.307 GHz (74.97%) + 50,229,914 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.97%) + 484,037,411 stalled-cycles-backend:u # 3.82% backend cycles idle (74.99%) + 31,763,793,252 instructions:u # 2.50 insn per cycle + # 0.02 stalled cycles per insn (74.99%) + 3.840009470 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.026416e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.839154e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.839154e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.529215 sec -INFO: No Floating Point Exceptions have been reported - 10,251,997,224 cycles # 2.897 GHz - 19,667,313,704 instructions # 1.92 insn per cycle - 3.539347005 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.799934e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.765940e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.765940e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.969501 sec +INFO: No Floating Point Exceptions have been reported + 9,679,661,163 cycles:u # 3.249 GHz (74.96%) + 49,712,980 stalled-cycles-frontend:u # 0.51% frontend cycles idle (75.03%) + 904,119,408 stalled-cycles-backend:u # 9.34% backend cycles idle (75.03%) + 19,500,860,421 instructions:u # 2.01 insn per cycle + # 0.05 stalled cycles per insn (75.03%) + 2.983989983 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.051463e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.907164e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.907164e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.495119 sec -INFO: No Floating Point Exceptions have been reported - 10,162,863,648 cycles # 2.902 GHz - 19,355,102,855 instructions # 1.90 insn per cycle - 3.505408660 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.813583e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.421948e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.421948e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.895263 sec -INFO: No Floating Point Exceptions have been reported - 8,768,256,609 cycles # 2.246 GHz - 15,838,557,376 instructions # 1.81 insn per cycle - 3.905255721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index a59f4a8bf6..088a07a09d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,54 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_11:13:53 -DATE: 2024-10-02_22:59:59 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.721261e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.941229e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.941229e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.226356 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,271,743,384 cycles # 2.941 GHz - 12,922,647,058 instructions # 1.78 insn per cycle - 2.529249715 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.856473e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.614655e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.614655e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.520594 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 18,145,006,763 cycles:u # 3.284 GHz (75.07%) + 219,222,569 stalled-cycles-frontend:u # 1.21% frontend cycles idle (75.06%) + 6,752,190,970 stalled-cycles-backend:u # 37.21% backend cycles idle (75.01%) + 16,698,321,112 instructions:u # 0.92 insn per cycle + # 0.40 stalled cycles per insn (74.89%) + 5.592402423 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -79,35 +56,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.036468e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.202117e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.202117e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.678078 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 20,324,172,184 cycles # 3.040 GHz - 46,315,699,520 instructions # 2.28 insn per cycle - 6.685452158 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.348917e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.547978e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.547978e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.516587 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 18,320,160,243 cycles:u # 3.308 GHz (74.99%) + 49,931,362 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.01%) + 393,514,300 stalled-cycles-backend:u # 2.15% backend cycles idle (75.02%) + 47,323,149,472 instructions:u # 2.58 insn per cycle + # 0.01 stalled cycles per insn (75.02%) + 5.542562977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -115,33 +93,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.546402e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.989841e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.989841e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.681304 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 14,274,276,990 cycles # 3.045 GHz - 32,466,525,739 instructions # 2.27 insn per cycle - 4.688943771 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.953054e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.422114e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.422114e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.095942 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 13,397,410,567 cycles:u # 3.254 GHz (74.94%) + 52,373,136 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.94%) + 529,306,431 stalled-cycles-backend:u # 3.95% backend cycles idle (74.94%) + 32,573,951,196 instructions:u # 2.43 insn per cycle + # 0.02 stalled cycles per insn (74.98%) + 4.122057791 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -149,33 +130,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.906327e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.606772e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.606772e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.924044 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 11,408,077,664 cycles # 2.903 GHz - 20,951,332,123 instructions # 1.84 insn per cycle - 3.931555912 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.673460e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.551032e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.551032e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.223521 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,320,162,030 cycles:u # 3.180 GHz (74.86%) + 40,080,497 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.92%) + 980,428,805 stalled-cycles-backend:u # 9.50% backend cycles idle (75.04%) + 20,354,090,333 instructions:u # 1.97 insn per cycle + # 0.05 stalled cycles per insn (75.10%) + 3.250249712 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -183,80 +167,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.914575e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.618914e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.618914e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.912846 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 11,210,840,615 cycles # 2.861 GHz - 20,624,082,345 instructions # 1.84 insn per cycle - 3.920179017 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.699169e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.222592e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.222592e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.333799 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,037,060,432 cycles # 2.312 GHz - 16,902,306,877 instructions # 1.68 insn per cycle - 4.341202688 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index 7ea35cfe0b..fca102346f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_11:19:20 -DATE: 2024-10-02_23:11:54 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.443145e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.507639e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.762000e+08 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.192548e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.883371e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.001383e+07 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 1.336303 sec -INFO: No Floating Point Exceptions have been reported - 4,703,225,547 cycles # 3.001 GHz - 7,361,645,114 instructions # 1.57 insn per cycle - 1.625770729 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +TOTAL : 4.724775 sec +INFO: No Floating Point Exceptions have been reported + 15,402,138,829 cycles:u # 3.262 GHz (75.11%) + 153,815,583 stalled-cycles-frontend:u # 1.00% frontend cycles idle (75.03%) + 6,739,435,463 stalled-cycles-backend:u # 43.76% backend cycles idle (74.83%) + 11,546,188,546 instructions:u # 0.75 insn per cycle + # 0.58 stalled cycles per insn (74.83%) + 4.783944753 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.065605e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.242135e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.242135e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.360739e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.563330e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.563330e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 6.667816 sec -INFO: No Floating Point Exceptions have been reported - 20,174,215,158 cycles # 3.024 GHz - 46,194,433,450 instructions # 2.29 insn per cycle - 6.673472199 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.363109 sec +INFO: No Floating Point Exceptions have been reported + 17,972,582,951 cycles:u # 3.344 GHz (74.99%) + 49,074,506 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.99%) + 335,813,940 stalled-cycles-backend:u # 1.87% backend cycles idle (74.99%) + 47,138,026,721 instructions:u # 2.62 insn per cycle + # 0.01 stalled cycles per insn (75.00%) + 5.375753941 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.621083e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.116265e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.116265e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.030468e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.536582e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.536582e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.640534 sec -INFO: No Floating Point Exceptions have been reported - 14,164,511,867 cycles # 3.049 GHz - 31,624,566,458 instructions # 2.23 insn per cycle - 4.646256052 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.833998 sec +INFO: No Floating Point Exceptions have been reported + 12,664,916,265 cycles:u # 3.295 GHz (74.98%) + 50,300,295 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.04%) + 476,519,825 stalled-cycles-backend:u # 3.76% backend cycles idle (75.03%) + 31,722,956,771 instructions:u # 2.50 insn per cycle + # 0.02 stalled cycles per insn (75.03%) + 3.846513223 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.051763e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.893360e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.893360e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.795971e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.768024e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.768024e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.824965 sec -INFO: No Floating Point Exceptions have been reported - 11,267,126,218 cycles # 2.942 GHz - 19,489,192,245 instructions # 1.73 insn per cycle - 3.830677247 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) +TOTAL : 2.979520 sec +INFO: No Floating Point Exceptions have been reported + 9,697,692,431 cycles:u # 3.243 GHz (74.87%) + 42,073,971 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.87%) + 927,318,016 stalled-cycles-backend:u # 9.56% backend cycles idle (75.00%) + 19,480,752,660 instructions:u # 2.01 insn per cycle + # 0.05 stalled cycles per insn (75.12%) + 2.991989434 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.087818e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.945247e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.945247e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.778924 sec -INFO: No Floating Point Exceptions have been reported - 11,081,632,446 cycles # 2.929 GHz - 18,949,715,150 instructions # 1.71 insn per cycle - 3.784626146 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.831176e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.441760e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.441760e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.203108 sec -INFO: No Floating Point Exceptions have been reported - 9,786,254,295 cycles # 2.326 GHz - 15,455,384,623 instructions # 1.58 insn per cycle - 4.208912505 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 9b9fa89512..090b5c3f6a 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,70 +1,50 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_11:17:32 -DATE: 2024-10-02_23:06:27 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.089648e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.586443e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.750079e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 1.885226 sec -INFO: No Floating Point Exceptions have been reported - 6,218,727,462 cycles # 2.936 GHz - 11,582,485,978 instructions # 1.86 insn per cycle - 2.174401796 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 8.128366e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.857659e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.974805e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.295975 sec +INFO: No Floating Point Exceptions have been reported + 17,592,799,954 cycles:u # 3.305 GHz (75.00%) + 182,786,945 stalled-cycles-frontend:u # 1.04% frontend cycles idle (75.03%) + 13,672,359 stalled-cycles-backend:u # 0.08% backend cycles idle (74.99%) + 15,972,251,030 instructions:u # 0.91 insn per cycle + # 0.01 stalled cycles per insn (75.06%) + 5.356420132 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -72,33 +52,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.072872e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.252789e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.252789e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.263357 sec -INFO: No Floating Point Exceptions have been reported - 19,072,777,161 cycles # 3.043 GHz - 46,090,846,095 instructions # 2.42 insn per cycle - 6.269085049 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.385072e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.586518e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.586518e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.273997 sec +INFO: No Floating Point Exceptions have been reported + 17,681,732,061 cycles:u # 3.346 GHz (75.02%) + 50,430,308 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.95%) + 248,748,061 stalled-cycles-backend:u # 1.41% backend cycles idle (74.95%) + 47,188,437,752 instructions:u # 2.67 insn per cycle + # 0.01 stalled cycles per insn (74.96%) + 5.286644699 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -106,31 +87,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.633315e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.140339e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.140339e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.255223 sec -INFO: No Floating Point Exceptions have been reported - 13,020,735,219 cycles # 3.057 GHz - 31,621,408,671 instructions # 2.43 insn per cycle - 4.260978065 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.008224e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.507631e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.507631e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.869010 sec +INFO: No Floating Point Exceptions have been reported + 12,789,221,933 cycles:u # 3.296 GHz (74.85%) + 51,318,726 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.94%) + 502,548,872 stalled-cycles-backend:u # 3.93% backend cycles idle (75.04%) + 31,779,945,697 instructions:u # 2.48 insn per cycle + # 0.02 stalled cycles per insn (75.05%) + 3.881584459 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -138,31 +122,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.046606e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.886962e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.886962e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.469317 sec -INFO: No Floating Point Exceptions have been reported - 10,147,691,110 cycles # 2.921 GHz - 19,588,780,648 instructions # 1.93 insn per cycle - 3.475349152 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.790772e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.759694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.759694e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.985788 sec +INFO: No Floating Point Exceptions have been reported + 9,705,175,626 cycles:u # 3.239 GHz (74.79%) + 42,542,630 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.79%) + 912,022,666 stalled-cycles-backend:u # 9.40% backend cycles idle (74.96%) + 19,486,481,816 instructions:u # 2.01 insn per cycle + # 0.05 stalled cycles per insn (75.09%) + 2.998243380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -170,76 +157,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.050953e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.887703e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.887703e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.468623 sec -INFO: No Floating Point Exceptions have been reported - 9,922,328,760 cycles # 2.860 GHz - 19,251,488,263 instructions # 1.94 insn per cycle - 3.474417423 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.831827e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.445212e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.445212e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.835346 sec -INFO: No Floating Point Exceptions have been reported - 8,636,609,147 cycles # 2.250 GHz - 15,756,094,199 instructions # 1.82 insn per cycle - 3.841169289 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index c7621e6788..14093880fb 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_10:23:21 -DATE: 2024-10-02_22:21:36 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.819349e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.631215e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.787548e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.671095 sec -INFO: No Floating Point Exceptions have been reported - 2,685,503,883 cycles # 2.965 GHz - 4,130,554,866 instructions # 1.54 insn per cycle - 0.966696272 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.484097e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.422933e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.563069e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.511661 sec +INFO: No Floating Point Exceptions have been reported + 1,398,188,345 cycles:u # 2.638 GHz (75.61%) + 2,461,273 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.89%) + 5,591,505 stalled-cycles-backend:u # 0.40% backend cycles idle (73.90%) + 2,145,158,950 instructions:u # 1.53 insn per cycle + # 0.00 stalled cycles per insn (73.31%) + 0.575076711 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165216E-002 +Relative difference = 1.0277079305077159e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.052130e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.226989e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.226989e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.412537 sec -INFO: No Floating Point Exceptions have been reported - 19,391,019,124 cycles # 3.020 GHz - 46,154,292,436 instructions # 2.38 insn per cycle - 6.422732999 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 452) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.382030e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.584869e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.584869e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.282948 sec +INFO: No Floating Point Exceptions have been reported + 17,777,735,792 cycles:u # 3.359 GHz (74.92%) + 49,448,707 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.94%) + 832,744,629 stalled-cycles-backend:u # 4.68% backend cycles idle (75.01%) + 46,714,050,600 instructions:u # 2.63 insn per cycle + # 0.02 stalled cycles per insn (75.07%) + 5.298501325 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 489) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.588098e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.081645e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.081645e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.407881 sec -INFO: No Floating Point Exceptions have been reported - 13,105,876,007 cycles # 2.967 GHz - 31,645,255,458 instructions # 2.41 insn per cycle - 4.418072899 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1648) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.004010e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.485647e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.485647e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.870864 sec +INFO: No Floating Point Exceptions have been reported + 12,819,717,718 cycles:u # 3.303 GHz (74.92%) + 50,607,851 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.86%) + 359,477,038 stalled-cycles-backend:u # 2.80% backend cycles idle (74.96%) + 31,507,091,856 instructions:u # 2.46 insn per cycle + # 0.01 stalled cycles per insn (75.06%) + 3.885734591 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1605) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.035425e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.856170e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.856170e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.514751 sec -INFO: No Floating Point Exceptions have been reported - 10,258,432,986 cycles # 2.911 GHz - 19,657,134,826 instructions # 1.92 insn per cycle - 3.524456549 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1894) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.740409e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.654964e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.654964e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.022461 sec +INFO: No Floating Point Exceptions have been reported + 9,864,809,022 cycles:u # 3.253 GHz (74.94%) + 50,075,148 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.94%) + 293,036,909 stalled-cycles-backend:u # 2.97% backend cycles idle (74.96%) + 19,443,790,175 instructions:u # 1.97 insn per cycle + # 0.02 stalled cycles per insn (74.96%) + 3.037197737 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1860) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.060342e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.905129e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.905129e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.482974 sec -INFO: No Floating Point Exceptions have been reported - 10,093,367,565 cycles # 2.892 GHz - 19,361,669,894 instructions # 1.92 insn per cycle - 3.493075437 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1636) (512y: 178) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.838118e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.475808e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.475808e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.849198 sec -INFO: No Floating Point Exceptions have been reported - 8,644,950,079 cycles # 2.241 GHz - 15,672,088,510 instructions # 1.81 insn per cycle - 3.859415675 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 833) (512y: 153) (512z: 1240) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 54eb09f988..7fd5ea321f 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_11:04:26 -DATE: 2024-10-02_22:50:31 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.126115e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.578363e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.801387e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.690273 sec -INFO: No Floating Point Exceptions have been reported - 2,735,433,860 cycles # 2.950 GHz - 4,273,045,275 instructions # 1.56 insn per cycle - 0.985887175 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.206650e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.859077e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.975637e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.533245 sec +INFO: No Floating Point Exceptions have been reported + 1,420,329,016 cycles:u # 2.584 GHz (76.76%) + 2,497,014 stalled-cycles-frontend:u # 0.18% frontend cycles idle (76.03%) + 12,053,500 stalled-cycles-backend:u # 0.85% backend cycles idle (75.55%) + 2,285,520,867 instructions:u # 1.61 insn per cycle + # 0.01 stalled cycles per insn (74.52%) + 0.596098577 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165208E-002 +Relative difference = 1.0277079981222336e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.661112e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.136857e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.136857e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.214528 sec -INFO: No Floating Point Exceptions have been reported - 12,808,005,477 cycles # 3.033 GHz - 32,654,262,253 instructions # 2.55 insn per cycle - 4.225073741 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 281) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.919696e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.340607e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.340607e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.018353 sec +INFO: No Floating Point Exceptions have been reported + 13,262,039,050 cycles:u # 3.291 GHz (75.00%) + 32,793,171 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.99%) + 186,423,621 stalled-cycles-backend:u # 1.41% backend cycles idle (74.99%) + 36,897,329,957 instructions:u # 2.78 insn per cycle + # 0.01 stalled cycles per insn (74.91%) + 4.034355011 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 679) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.051696e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.918485e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.918485e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.496269 sec -INFO: No Floating Point Exceptions have been reported - 10,653,047,507 cycles # 3.039 GHz - 24,982,853,721 instructions # 2.35 insn per cycle - 3.507179313 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1246) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.640706e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.573372e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.573372e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.111451 sec +INFO: No Floating Point Exceptions have been reported + 10,154,741,768 cycles:u # 3.252 GHz (74.92%) + 49,697,741 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.92%) + 89,970,819 stalled-cycles-backend:u # 0.89% backend cycles idle (74.90%) + 24,422,576,739 instructions:u # 2.41 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 3.126925503 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2326) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.258708e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.344293e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.344293e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.213344 sec -INFO: No Floating Point Exceptions have been reported - 9,339,985,820 cycles # 2.898 GHz - 16,922,939,045 instructions # 1.81 insn per cycle - 3.223888003 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1599) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.230451e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.583770e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.583770e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.678075 sec +INFO: No Floating Point Exceptions have been reported + 8,614,135,245 cycles:u # 3.203 GHz (74.99%) + 51,623,769 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.02%) + 111,017,559 stalled-cycles-backend:u # 1.29% backend cycles idle (75.02%) + 16,851,748,589 instructions:u # 1.96 insn per cycle + # 0.01 stalled cycles per insn (75.02%) + 2.694227101 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2981) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.344116e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.474330e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.474330e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.104706 sec -INFO: No Floating Point Exceptions have been reported - 9,100,480,389 cycles # 2.922 GHz - 16,469,426,004 instructions # 1.81 insn per cycle - 3.115374973 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1355) (512y: 139) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.035984e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.833687e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.833687e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.516318 sec -INFO: No Floating Point Exceptions have been reported - 8,033,525,618 cycles # 2.278 GHz - 14,639,859,340 instructions # 1.82 insn per cycle - 3.527113937 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1003) (512y: 158) (512z: 946) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 28c6ef0de9..78c37947fa 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_11:04:40 -DATE: 2024-10-02_22:50:57 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.262862e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.524016e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.778808e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.681785 sec -INFO: No Floating Point Exceptions have been reported - 2,742,251,071 cycles # 2.977 GHz - 4,303,655,049 instructions # 1.57 insn per cycle - 0.980574806 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.487887e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.405993e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.545751e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.517724 sec +INFO: No Floating Point Exceptions have been reported + 1,408,399,442 cycles:u # 2.627 GHz (74.60%) + 2,508,628 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.69%) + 5,356,088 stalled-cycles-backend:u # 0.38% backend cycles idle (75.20%) + 2,221,238,384 instructions:u # 1.58 insn per cycle + # 0.00 stalled cycles per insn (75.71%) + 0.576331891 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165201E-002 -Relative difference = 1.0277080522138477e-08 +Avg ME (F77/GPU) = 1.2828039868165216E-002 +Relative difference = 1.0277079305077159e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.161225e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.040754e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.040754e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.335829 sec -INFO: No Floating Point Exceptions have been reported - 10,146,617,229 cycles # 3.033 GHz - 25,589,254,913 instructions # 2.52 insn per cycle - 3.346659723 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.697234e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.584139e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.584139e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.070213 sec +INFO: No Floating Point Exceptions have been reported + 9,981,745,626 cycles:u # 3.239 GHz (75.04%) + 49,772,542 stalled-cycles-frontend:u # 0.50% frontend cycles idle (75.08%) + 53,623,611 stalled-cycles-backend:u # 0.54% backend cycles idle (74.96%) + 28,300,840,364 instructions:u # 2.84 insn per cycle + # 0.00 stalled cycles per insn (74.96%) + 3.086278569 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 609) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.389684e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.653493e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.653493e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.061315 sec -INFO: No Floating Point Exceptions have been reported - 9,297,564,398 cycles # 3.028 GHz - 21,628,602,982 instructions # 2.33 insn per cycle - 3.072141619 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1112) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.951826e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.163421e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.163421e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.864877 sec +INFO: No Floating Point Exceptions have been reported + 9,264,715,688 cycles:u # 3.221 GHz (74.97%) + 49,378,464 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.97%) + 48,538,201 stalled-cycles-backend:u # 0.52% backend cycles idle (74.99%) + 21,312,934,455 instructions:u # 2.30 insn per cycle + # 0.00 stalled cycles per insn (74.99%) + 2.881181621 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2070) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.460349e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.734760e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.734760e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.978841 sec -INFO: No Floating Point Exceptions have been reported - 8,745,360,906 cycles # 2.926 GHz - 16,041,491,471 instructions # 1.83 insn per cycle - 2.989532515 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1497) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.453250e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.057430e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.057430e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.551172 sec +INFO: No Floating Point Exceptions have been reported + 8,156,601,641 cycles:u # 3.183 GHz (74.95%) + 48,682,113 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.03%) + 51,867,280 stalled-cycles-backend:u # 0.64% backend cycles idle (75.03%) + 15,737,675,973 instructions:u # 1.93 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 2.566825767 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2739) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 +Avg ME (F77/C++) = 1.2828039868165086E-002 +Relative difference = 1.0277089447254817e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.476083e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.781435e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.781435e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 2.970273 sec -INFO: No Floating Point Exceptions have been reported - 8,587,107,250 cycles # 2.881 GHz - 15,647,403,648 instructions # 1.82 insn per cycle - 2.981139555 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1264) (512y: 141) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.122558e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.018467e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.018467e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.391235 sec -INFO: No Floating Point Exceptions have been reported - 7,801,685,793 cycles # 2.294 GHz - 14,376,558,537 instructions # 1.84 insn per cycle - 3.401770423 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1031) (512y: 164) (512z: 876) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165088E-002 -Relative difference = 1.0277089312025782e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index c7851bae9b..e3dd1c6d17 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_10:23:37 -DATE: 2024-10-02_22:22:06 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.333916e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.720978e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.674302e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.576041 sec -INFO: No Floating Point Exceptions have been reported - 2,377,343,527 cycles # 2.962 GHz - 3,703,505,222 instructions # 1.56 insn per cycle - 0.861388802 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.415059e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.154679e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.333976e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 +TOTAL : 0.394459 sec +INFO: No Floating Point Exceptions have been reported + 1,037,476,022 cycles:u # 2.548 GHz (74.87%) + 2,409,202 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.64%) + 7,243,116 stalled-cycles-backend:u # 0.70% backend cycles idle (75.53%) + 2,070,988,901 instructions:u # 2.00 insn per cycle + # 0.00 stalled cycles per insn (75.19%) + 0.451216224 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.109379e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.311359e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.311359e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.036148 sec -INFO: No Floating Point Exceptions have been reported - 18,304,223,591 cycles # 3.030 GHz - 45,024,500,068 instructions # 2.46 insn per cycle - 6.042994691 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.630698e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.914703e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.914703e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.530429 sec +INFO: No Floating Point Exceptions have been reported + 15,220,726,582 cycles:u # 3.354 GHz (74.97%) + 39,030,379 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.97%) + 461,472,431 stalled-cycles-backend:u # 3.03% backend cycles idle (74.97%) + 47,145,457,833 instructions:u # 3.10 insn per cycle + # 0.01 stalled cycles per insn (74.98%) + 4.542602349 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.299446e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.533279e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.533279e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.114429 sec -INFO: No Floating Point Exceptions have been reported - 9,418,027,973 cycles # 3.018 GHz - 22,310,907,211 instructions # 2.37 insn per cycle - 3.122195191 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.196871e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.565237e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.565237e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.643500 sec +INFO: No Floating Point Exceptions have been reported + 8,592,908,878 cycles:u # 3.242 GHz (74.95%) + 38,376,427 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.95%) + 1,214,006,248 stalled-cycles-backend:u # 14.13% backend cycles idle (74.95%) + 22,479,795,547 instructions:u # 2.62 insn per cycle + # 0.05 stalled cycles per insn (74.97%) + 2.655199075 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.483873e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.823583e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.823583e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.905968 sec -INFO: No Floating Point Exceptions have been reported - 8,476,323,738 cycles # 2.911 GHz - 15,781,236,641 instructions # 1.86 insn per cycle - 2.913223219 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.534852e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.157819e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.157819e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.450189 sec +INFO: No Floating Point Exceptions have been reported + 7,924,483,978 cycles:u # 3.225 GHz (74.96%) + 41,053,771 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.93%) + 1,736,678,490 stalled-cycles-backend:u # 21.92% backend cycles idle (74.93%) + 15,506,768,997 instructions:u # 1.96 insn per cycle + # 0.11 stalled cycles per insn (74.95%) + 2.461796003 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.502978e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.888551e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.888551e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.881646 sec -INFO: No Floating Point Exceptions have been reported - 8,393,499,476 cycles # 2.906 GHz - 15,616,953,644 instructions # 1.86 insn per cycle - 2.888818844 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.545557e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.922524e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.922524e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.843212 sec -INFO: No Floating Point Exceptions have been reported - 6,718,315,669 cycles # 2.359 GHz - 12,888,229,695 instructions # 1.92 insn per cycle - 2.850457369 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 407af2f83c..9bf252161c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,54 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_11:14:14 -DATE: 2024-10-02_23:00:32 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.245423e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.983473e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.983473e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.688744 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,642,999,290 cycles # 2.936 GHz - 10,214,524,122 instructions # 1.81 insn per cycle - 1.977586864 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 8.260949e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.091655e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091655e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 +TOTAL : 5.283278 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 17,560,883,148 cycles:u # 3.310 GHz (75.05%) + 112,550,536 stalled-cycles-frontend:u # 0.64% frontend cycles idle (75.07%) + 6,690,366,957 stalled-cycles-backend:u # 38.10% backend cycles idle (74.97%) + 16,597,038,187 instructions:u # 0.95 insn per cycle + # 0.40 stalled cycles per insn (74.88%) + 5.344260955 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -79,35 +56,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.094603e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.288157e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.288157e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.221630 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 18,928,122,768 cycles # 3.040 GHz - 45,157,983,866 instructions # 2.39 insn per cycle - 6.228889536 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.611734e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.891508e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.891508e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.641800 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 15,468,183,515 cycles:u # 3.323 GHz (74.91%) + 38,886,191 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.95%) + 469,541,355 stalled-cycles-backend:u # 3.04% backend cycles idle (75.04%) + 47,266,056,863 instructions:u # 3.06 insn per cycle + # 0.01 stalled cycles per insn (75.08%) + 4.659113969 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -115,33 +93,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.221557e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.317309e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.317309e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.330129 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,084,607,792 cycles # 3.023 GHz - 23,610,389,165 instructions # 2.34 insn per cycle - 3.337223492 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.076386e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.328396e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.328396e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.798855 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,991,931,096 cycles:u # 3.198 GHz (74.97%) + 38,133,187 stalled-cycles-frontend:u # 0.42% frontend cycles idle (74.96%) + 1,257,710,731 stalled-cycles-backend:u # 13.99% backend cycles idle (74.84%) + 23,526,850,713 instructions:u # 2.62 insn per cycle + # 0.05 stalled cycles per insn (74.84%) + 2.816253896 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -149,33 +130,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.383113e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.593932e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.593932e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.129082 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,216,055,332 cycles # 2.939 GHz - 16,874,105,782 instructions # 1.83 insn per cycle - 3.136137450 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.330822e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.832750e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.832750e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.652366 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,497,901,482 cycles:u # 3.189 GHz (74.92%) + 41,697,449 stalled-cycles-frontend:u # 0.49% frontend cycles idle (75.06%) + 1,783,825,384 stalled-cycles-backend:u # 20.99% backend cycles idle (75.09%) + 16,496,010,163 instructions:u # 1.94 insn per cycle + # 0.11 stalled cycles per insn (75.09%) + 2.669683386 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -183,80 +167,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.404313e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.669923e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.669923e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 3.107612 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,139,317,896 cycles # 2.935 GHz - 16,718,242,091 instructions # 1.83 insn per cycle - 3.114416427 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.422868e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.634285e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.634285e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 3.093334 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,456,855,936 cycles # 2.406 GHz - 14,072,286,974 instructions # 1.89 insn per cycle - 3.100340528 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index 6e51eea5f0..fe3846c47c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_11:19:40 -DATE: 2024-10-02_23:12:26 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.219425e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.271393e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.274485e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 -TOTAL : 1.184237 sec -INFO: No Floating Point Exceptions have been reported - 4,211,023,602 cycles # 2.994 GHz - 6,711,358,986 instructions # 1.59 insn per cycle - 1.464824370 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.386487e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.203073e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.390321e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371906e-02 +- 3.274477e-06 ) GeV^0 +TOTAL : 4.569485 sec +INFO: No Floating Point Exceptions have been reported + 15,043,606,254 cycles:u # 3.295 GHz (74.98%) + 53,934,412 stalled-cycles-frontend:u # 0.36% frontend cycles idle (75.11%) + 6,692,579,126 stalled-cycles-backend:u # 44.49% backend cycles idle (75.07%) + 11,364,204,925 instructions:u # 0.76 insn per cycle + # 0.59 stalled cycles per insn (74.93%) + 4.621115624 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.108754e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.311552e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.311552e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.633877e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.919079e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.919079e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 6.372009 sec -INFO: No Floating Point Exceptions have been reported - 19,261,147,103 cycles # 3.021 GHz - 45,187,144,333 instructions # 2.35 insn per cycle - 6.377610836 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.526013 sec +INFO: No Floating Point Exceptions have been reported + 15,210,506,774 cycles:u # 3.356 GHz (74.94%) + 38,928,878 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.94%) + 450,561,554 stalled-cycles-backend:u # 2.96% backend cycles idle (74.95%) + 47,190,129,181 instructions:u # 3.10 insn per cycle + # 0.01 stalled cycles per insn (74.99%) + 4.534902488 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.341796e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.585577e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.585577e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.168663e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.498737e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.498737e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 3.380098 sec -INFO: No Floating Point Exceptions have been reported - 10,320,148,878 cycles # 3.049 GHz - 22,354,637,694 instructions # 2.17 insn per cycle - 3.385562983 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.665278 sec +INFO: No Floating Point Exceptions have been reported + 8,665,266,652 cycles:u # 3.243 GHz (74.87%) + 37,924,343 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.88%) + 1,195,884,985 stalled-cycles-backend:u # 13.80% backend cycles idle (74.96%) + 22,455,976,899 instructions:u # 2.59 insn per cycle + # 0.05 stalled cycles per insn (75.11%) + 2.674409242 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.489756e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.828537e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.828537e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.531288e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.151978e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.151978e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.219462 sec -INFO: No Floating Point Exceptions have been reported - 9,424,957,911 cycles # 2.923 GHz - 15,663,887,385 instructions # 1.66 insn per cycle - 3.224887660 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) +TOTAL : 2.454561 sec +INFO: No Floating Point Exceptions have been reported + 7,934,402,650 cycles:u # 3.225 GHz (74.98%) + 40,876,284 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.98%) + 1,742,906,133 stalled-cycles-backend:u # 21.97% backend cycles idle (74.97%) + 15,484,194,069 instructions:u # 1.95 insn per cycle + # 0.11 stalled cycles per insn (74.97%) + 2.463723139 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.514091e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.920313e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.920313e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.200138 sec -INFO: No Floating Point Exceptions have been reported - 9,405,049,933 cycles # 2.935 GHz - 15,298,078,322 instructions # 1.63 insn per cycle - 3.205675908 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.575381e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.980148e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.980148e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 3.145944 sec -INFO: No Floating Point Exceptions have been reported - 7,690,829,828 cycles # 2.442 GHz - 12,573,137,118 instructions # 1.63 insn per cycle - 3.151480501 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index e41f96f72e..fce8e2dea5 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,70 +1,50 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_11:17:52 -DATE: 2024-10-02_23:06:58 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.214771e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.300228e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.215505e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 -TOTAL : 1.471162 sec -INFO: No Floating Point Exceptions have been reported - 5,070,897,985 cycles # 2.995 GHz - 9,257,924,094 instructions # 1.83 insn per cycle - 1.751258093 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 9.143752e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.098317e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.310156e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 +TOTAL : 5.183411 sec +INFO: No Floating Point Exceptions have been reported + 17,254,799,796 cycles:u # 3.314 GHz (75.03%) + 113,518,720 stalled-cycles-frontend:u # 0.66% frontend cycles idle (75.06%) + 6,686,559,521 stalled-cycles-backend:u # 38.75% backend cycles idle (75.05%) + 16,253,572,458 instructions:u # 0.94 insn per cycle + # 0.41 stalled cycles per insn (75.07%) + 5.239982498 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -72,33 +52,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.116110e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.316779e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.316779e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 5.995790 sec -INFO: No Floating Point Exceptions have been reported - 18,249,461,991 cycles # 3.042 GHz - 45,007,924,974 instructions # 2.47 insn per cycle - 6.001394527 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.601054e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.884721e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.884721e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.615704 sec +INFO: No Floating Point Exceptions have been reported + 15,491,641,339 cycles:u # 3.352 GHz (74.92%) + 37,931,607 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.93%) + 518,295,353 stalled-cycles-backend:u # 3.35% backend cycles idle (75.01%) + 47,106,508,620 instructions:u # 3.04 insn per cycle + # 0.01 stalled cycles per insn (75.08%) + 4.624339853 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -106,31 +87,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.333543e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.558339e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.558339e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.057214 sec -INFO: No Floating Point Exceptions have been reported - 9,287,290,653 cycles # 3.033 GHz - 22,273,732,814 instructions # 2.40 insn per cycle - 3.062726450 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.179057e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.517045e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.517045e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.659897 sec +INFO: No Floating Point Exceptions have been reported + 8,655,155,479 cycles:u # 3.246 GHz (74.83%) + 37,812,601 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.96%) + 1,186,940,794 stalled-cycles-backend:u # 13.71% backend cycles idle (75.10%) + 22,508,579,226 instructions:u # 2.60 insn per cycle + # 0.05 stalled cycles per insn (75.10%) + 2.668318748 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -138,31 +122,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.502845e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.836320e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.836320e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.876199 sec -INFO: No Floating Point Exceptions have been reported - 8,408,107,143 cycles # 2.919 GHz - 15,752,835,316 instructions # 1.87 insn per cycle - 2.881789095 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.527163e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.155147e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.155147e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.457584 sec +INFO: No Floating Point Exceptions have been reported + 7,933,593,866 cycles:u # 3.220 GHz (75.00%) + 41,086,028 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.00%) + 1,740,967,447 stalled-cycles-backend:u # 21.94% backend cycles idle (75.00%) + 15,472,252,186 instructions:u # 1.95 insn per cycle + # 0.11 stalled cycles per insn (75.00%) + 2.466527154 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -170,76 +157,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.499098e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.884933e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.884933e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.877505 sec -INFO: No Floating Point Exceptions have been reported - 8,358,416,525 cycles # 2.900 GHz - 15,588,323,205 instructions # 1.86 insn per cycle - 2.883031739 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.587399e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.988207e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.988207e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.795754 sec -INFO: No Floating Point Exceptions have been reported - 6,626,582,298 cycles # 2.366 GHz - 12,863,258,956 instructions # 1.94 insn per cycle - 2.801279409 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052585973637E-002 -Relative difference = 2.0158743040564767e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 93cccb812d..181a08d9c8 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_10:23:51 -DATE: 2024-10-02_22:22:32 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.343706e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.862423e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.018725e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.575938 sec -INFO: No Floating Point Exceptions have been reported - 2,392,010,928 cycles # 2.956 GHz - 3,674,427,647 instructions # 1.54 insn per cycle - 0.866892917 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.519289e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.667605e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.910890e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 +TOTAL : 0.392354 sec +INFO: No Floating Point Exceptions have been reported + 1,045,582,005 cycles:u # 2.579 GHz (74.69%) + 2,411,477 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.64%) + 8,261,208 stalled-cycles-backend:u # 0.79% backend cycles idle (74.45%) + 2,082,907,116 instructions:u # 1.99 insn per cycle + # 0.00 stalled cycles per insn (73.57%) + 0.449523585 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.105467e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.308351e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.308351e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 6.065807 sec -INFO: No Floating Point Exceptions have been reported - 18,430,609,716 cycles # 3.036 GHz - 45,013,968,880 instructions # 2.44 insn per cycle - 6.072784911 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 397) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.644542e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.934014e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.934014e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 4.500088 sec +INFO: No Floating Point Exceptions have been reported + 15,117,036,320 cycles:u # 3.354 GHz (74.98%) + 38,695,670 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.98%) + 701,327,415 stalled-cycles-backend:u # 4.64% backend cycles idle (74.98%) + 46,331,934,014 instructions:u # 3.06 insn per cycle + # 0.02 stalled cycles per insn (74.99%) + 4.511695894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039854866802E-002 -Relative difference = 1.1313746984080878e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.308005e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.525687e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.525687e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.099771 sec -INFO: No Floating Point Exceptions have been reported - 9,387,612,417 cycles # 3.022 GHz - 22,262,525,785 instructions # 2.37 insn per cycle - 3.106925476 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1935) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.184636e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.549935e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.549935e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.648827 sec +INFO: No Floating Point Exceptions have been reported + 8,627,833,311 cycles:u # 3.249 GHz (75.00%) + 38,138,945 stalled-cycles-frontend:u # 0.44% frontend cycles idle (75.00%) + 1,113,458,421 stalled-cycles-backend:u # 12.91% backend cycles idle (75.00%) + 22,343,086,276 instructions:u # 2.59 insn per cycle + # 0.05 stalled cycles per insn (75.00%) + 2.660848486 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1874) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.403111e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.688485e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.688485e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.998210 sec -INFO: No Floating Point Exceptions have been reported - 8,478,264,746 cycles # 2.822 GHz - 15,771,817,686 instructions # 1.86 insn per cycle - 3.005389330 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2540) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.543143e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.175956e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.175956e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.461812 sec +INFO: No Floating Point Exceptions have been reported + 7,940,057,293 cycles:u # 3.215 GHz (74.96%) + 41,174,009 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.06%) + 1,882,846,184 stalled-cycles-backend:u # 23.71% backend cycles idle (75.06%) + 15,379,580,907 instructions:u # 1.94 insn per cycle + # 0.12 stalled cycles per insn (75.06%) + 2.475026898 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2501) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.519220e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.918776e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.918776e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.869953 sec -INFO: No Floating Point Exceptions have been reported - 8,393,268,013 cycles # 2.918 GHz - 15,616,623,130 instructions # 1.86 insn per cycle - 2.877528511 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2438) (512y: 10) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053255361738E-002 -Relative difference = 2.5376902468575066e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.552752e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.947223e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.947223e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.838532 sec -INFO: No Floating Point Exceptions have been reported - 6,699,223,007 cycles # 2.355 GHz - 12,875,694,500 instructions # 1.92 insn per cycle - 2.846218721 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1669) (512y: 16) (512z: 1427) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052564145764E-002 -Relative difference = 1.9988585667912256e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index c2fede3d2c..77ba118279 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_11:04:52 -DATE: 2024-10-02_22:51:22 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.237934e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.403884e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.415879e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.576926 sec -INFO: No Floating Point Exceptions have been reported - 2,374,711,860 cycles # 2.948 GHz - 3,718,677,413 instructions # 1.57 insn per cycle - 0.862944455 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.414008e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.126835e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.302976e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 +TOTAL : 0.399417 sec +INFO: No Floating Point Exceptions have been reported + 1,001,400,746 cycles:u # 2.433 GHz (75.66%) + 2,389,902 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.96%) + 7,897,841 stalled-cycles-backend:u # 0.79% backend cycles idle (73.56%) + 2,155,425,468 instructions:u # 2.15 insn per cycle + # 0.00 stalled cycles per insn (73.95%) + 0.459641971 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.667468e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.170854e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.170854e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 4.146636 sec -INFO: No Floating Point Exceptions have been reported - 12,261,145,046 cycles # 2.953 GHz - 32,316,842,246 instructions # 2.64 insn per cycle - 4.153494127 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 290) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.192494e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.739731e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.739731e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 3.544314 sec +INFO: No Floating Point Exceptions have been reported + 11,738,609,068 cycles:u # 3.305 GHz (75.00%) + 37,960,811 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.00%) + 1,904,176,289 stalled-cycles-backend:u # 16.22% backend cycles idle (75.00%) + 37,556,795,480 instructions:u # 3.20 insn per cycle + # 0.05 stalled cycles per insn (75.00%) + 3.556426727 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 705) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039840314887E-002 -Relative difference = 1.244813035273009e-08 +Avg ME (F77/C++) = 1.2828039543819614E-002 +Relative difference = 3.5561191488957804e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.725444e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.600281e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.600281e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.681360 sec -INFO: No Floating Point Exceptions have been reported - 8,088,187,177 cycles # 3.009 GHz - 18,710,529,150 instructions # 2.31 insn per cycle - 2.688484326 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1534) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.858282e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.030493e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.030493e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.308738 sec +INFO: No Floating Point Exceptions have been reported + 7,435,914,224 cycles:u # 3.211 GHz (74.83%) + 39,808,129 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.82%) + 222,247,801 stalled-cycles-backend:u # 2.99% backend cycles idle (74.97%) + 18,452,473,674 instructions:u # 2.48 insn per cycle + # 0.01 stalled cycles per insn (75.13%) + 2.320549620 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2784) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039283704129E-002 -Relative difference = 5.583829420356249e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.859277e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.808400e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.808400e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.569037 sec -INFO: No Floating Point Exceptions have been reported - 7,549,873,391 cycles # 2.932 GHz - 14,270,632,476 instructions # 1.89 insn per cycle - 2.576072623 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2234) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.889053e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.958140e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.958140e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.293183 sec +INFO: No Floating Point Exceptions have been reported + 7,366,115,678 cycles:u # 3.203 GHz (74.89%) + 43,337,125 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.96%) + 836,904,460 stalled-cycles-backend:u # 11.36% backend cycles idle (74.96%) + 14,165,019,880 instructions:u # 1.92 insn per cycle + # 0.06 stalled cycles per insn (74.99%) + 2.305245880 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4304) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053244447801E-002 -Relative difference = 2.5291823782248813e-07 +Avg ME (F77/C++) = 1.2828053369958070E-002 +Relative difference = 2.627022867500074e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.912318e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.926913e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.926913e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.529094 sec -INFO: No Floating Point Exceptions have been reported - 7,434,475,397 cycles # 2.932 GHz - 13,977,545,253 instructions # 1.88 insn per cycle - 2.536141283 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2087) (512y: 3) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053244447801E-002 -Relative difference = 2.5291823782248813e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.641405e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.120039e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.120039e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.753404 sec -INFO: No Floating Point Exceptions have been reported - 6,573,430,342 cycles # 2.382 GHz - 13,458,829,954 instructions # 2.05 insn per cycle - 2.760331688 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2073) (512y: 1) (512z: 1201) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052562326775E-002 -Relative difference = 1.997440588685788e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index 42dc2f68f3..b9eaa981bd 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_11:05:04 -DATE: 2024-10-02_22:51:45 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.186843e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.656263e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.696977e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 -TOTAL : 0.581467 sec -INFO: No Floating Point Exceptions have been reported - 2,378,200,312 cycles # 2.946 GHz - 3,636,272,588 instructions # 1.53 insn per cycle - 0.866537822 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=1, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.517534e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.633499e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.873519e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 +TOTAL : 0.394098 sec +INFO: No Floating Point Exceptions have been reported + 984,000,288 cycles:u # 2.417 GHz (75.54%) + 2,289,270 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.86%) + 6,710,527 stalled-cycles-backend:u # 0.68% backend cycles idle (74.60%) + 2,111,029,549 instructions:u # 2.15 insn per cycle + # 0.00 stalled cycles per insn (73.97%) + 0.450745849 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828112125134794E-002 -Relative difference = 7.1815552823662555e-06 +Avg ME (F77/GPU) = 1.2828036060454906E-002 +Relative difference = 1.251982371809749e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.269342e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.321851e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.321851e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 3.139062 sec -INFO: No Floating Point Exceptions have been reported - 9,447,844,635 cycles # 3.004 GHz - 25,728,895,866 instructions # 2.72 insn per cycle - 3.146180190 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 243) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.082552e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.290562e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.290562e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 +TOTAL : 2.716231 sec +INFO: No Floating Point Exceptions have been reported + 8,898,388,041 cycles:u # 3.267 GHz (74.93%) + 41,829,985 stalled-cycles-frontend:u # 0.47% frontend cycles idle (75.03%) + 29,489,710 stalled-cycles-backend:u # 0.33% backend cycles idle (75.03%) + 28,391,942,107 instructions:u # 3.19 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 2.728179465 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039838495897E-002 -Relative difference = 1.2589928273811243e-08 +Avg ME (F77/C++) = 1.2828039569285465E-002 +Relative difference = 3.357602059382168e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.082178e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.667437e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.667437e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 -TOTAL : 2.412915 sec -INFO: No Floating Point Exceptions have been reported - 7,357,724,099 cycles # 3.042 GHz - 16,792,911,111 instructions # 2.28 insn per cycle - 2.419999040 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1311) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.295051e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.197798e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.197798e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 +TOTAL : 2.144958 sec +INFO: No Floating Point Exceptions have been reported + 6,871,672,269 cycles:u # 3.193 GHz (74.78%) + 38,823,881 stalled-cycles-frontend:u # 0.56% frontend cycles idle (74.90%) + 30,579,912 stalled-cycles-backend:u # 0.45% backend cycles idle (75.08%) + 16,529,674,900 instructions:u # 2.41 insn per cycle + # 0.00 stalled cycles per insn (75.10%) + 2.157104605 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2423) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039280066150E-002 -Relative difference = 5.612189004572479e-08 +Avg ME (F77/C++) = 1.2828039385567536E-002 +Relative difference = 4.7897610623017996e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.009521e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.244937e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.244937e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.458445 sec -INFO: No Floating Point Exceptions have been reported - 7,244,876,322 cycles # 2.940 GHz - 13,685,401,521 instructions # 1.89 insn per cycle - 2.465610624 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2067) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.100324e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.455573e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.455573e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 2.207336 sec +INFO: No Floating Point Exceptions have been reported + 7,073,444,737 cycles:u # 3.194 GHz (74.81%) + 42,370,010 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.88%) + 694,346,485 stalled-cycles-backend:u # 9.82% backend cycles idle (75.06%) + 13,519,186,690 instructions:u # 1.91 insn per cycle + # 0.05 stalled cycles per insn (75.08%) + 2.219078688 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3983) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053220800939E-002 -Relative difference = 2.5107486628541925e-07 +Avg ME (F77/C++) = 1.2828053349949187E-002 +Relative difference = 2.611425108340261e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.056703e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.398349e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.398349e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 -TOTAL : 2.426097 sec -INFO: No Floating Point Exceptions have been reported - 7,152,685,127 cycles # 2.941 GHz - 13,478,713,055 instructions # 1.88 insn per cycle - 2.433340778 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1935) (512y: 7) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053220800939E-002 -Relative difference = 2.5107486628541925e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.725686e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.419420e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.419420e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 -TOTAL : 2.678328 sec -INFO: No Floating Point Exceptions have been reported - 6,471,041,764 cycles # 2.410 GHz - 13,198,051,679 instructions # 2.04 insn per cycle - 2.685585168 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2039) (512y: 2) (512z: 1081) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828052536860923E-002 -Relative difference = 1.977588895209662e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 2060fbedbb..1f715ef8b5 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_10:24:04 -DATE: 2024-10-02_22:22:58 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.928121e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.676063e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.875343e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.669424 sec -INFO: No Floating Point Exceptions have been reported - 2,687,042,079 cycles # 2.965 GHz - 4,204,109,883 instructions # 1.56 insn per cycle - 0.965175843 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.206239e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.874491e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.991778e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.525215 sec +INFO: No Floating Point Exceptions have been reported + 1,408,443,106 cycles:u # 2.603 GHz (75.91%) + 2,359,037 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.93%) + 7,955,864 stalled-cycles-backend:u # 0.56% backend cycles idle (72.82%) + 2,289,766,618 instructions:u # 1.63 insn per cycle + # 0.00 stalled cycles per insn (74.01%) + 0.586272278 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 +Avg ME (F77/GPU) = 1.2828039901590281E-002 +Relative difference = 7.67145406542181e-09 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.052853e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.226798e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.226798e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.407166 sec -INFO: No Floating Point Exceptions have been reported - 19,535,555,015 cycles # 3.045 GHz - 46,362,239,692 instructions # 2.37 insn per cycle - 6.417789931 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.390880e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.598202e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.598202e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.256363 sec +INFO: No Floating Point Exceptions have been reported + 17,676,945,035 cycles:u # 3.356 GHz (74.95%) + 52,327,703 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.95%) + 122,603,341 stalled-cycles-backend:u # 0.69% backend cycles idle (74.95%) + 47,500,992,681 instructions:u # 2.69 insn per cycle + # 0.00 stalled cycles per insn (74.95%) + 5.271945686 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 454) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.666136e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.232533e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.232533e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.230802 sec -INFO: No Floating Point Exceptions have been reported - 12,890,679,042 cycles # 3.040 GHz - 31,578,108,652 instructions # 2.45 insn per cycle - 4.240949908 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1731) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.079037e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.611959e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.611959e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.755414 sec +INFO: No Floating Point Exceptions have been reported + 12,399,293,680 cycles:u # 3.293 GHz (74.95%) + 49,795,094 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.93%) + 1,134,444,548 stalled-cycles-backend:u # 9.15% backend cycles idle (74.95%) + 31,491,925,278 instructions:u # 2.54 insn per cycle + # 0.04 stalled cycles per insn (74.95%) + 3.770015251 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1704) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.010640e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.821489e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.821489e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.563594 sec -INFO: No Floating Point Exceptions have been reported - 10,372,454,793 cycles # 2.902 GHz - 19,578,852,143 instructions # 1.89 insn per cycle - 3.574922628 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2045) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.765047e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.709049e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.709049e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.007196 sec +INFO: No Floating Point Exceptions have been reported + 9,786,968,009 cycles:u # 3.243 GHz (74.87%) + 50,806,066 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.00%) + 270,252,174 stalled-cycles-backend:u # 2.76% backend cycles idle (75.08%) + 19,298,900,833 instructions:u # 1.97 insn per cycle + # 0.01 stalled cycles per insn (75.08%) + 3.021795385 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2054) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.069471e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.914096e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.914096e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.466182 sec -INFO: No Floating Point Exceptions have been reported - 10,155,286,917 cycles # 2.921 GHz - 19,386,130,150 instructions # 1.91 insn per cycle - 3.477475193 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1799) (512y: 188) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.858221e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.512069e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.512069e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.816838 sec -INFO: No Floating Point Exceptions have been reported - 8,594,167,517 cycles # 2.246 GHz - 15,203,120,195 instructions # 1.77 insn per cycle - 3.827835521 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 966) (512y: 154) (512z: 1330) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 48c59a6c19..2140351b90 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +DATE: 2024-10-04_10:24:20 -DATE: 2024-10-02_22:23:28 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.001883e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.688202e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.868771e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 0.683910 sec -INFO: No Floating Point Exceptions have been reported - 2,716,417,669 cycles # 2.955 GHz - 4,171,561,022 instructions # 1.54 insn per cycle - 0.979523470 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.543426e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.535835e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.681413e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 +TOTAL : 0.518055 sec +INFO: No Floating Point Exceptions have been reported + 1,400,628,963 cycles:u # 2.626 GHz (74.64%) + 2,439,477 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.12%) + 10,838,954 stalled-cycles-backend:u # 0.77% backend cycles idle (74.41%) + 2,170,699,040 instructions:u # 1.55 insn per cycle + # 0.00 stalled cycles per insn (75.69%) + 0.579296770 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590279E-002 -Relative difference = 7.671454200650844e-09 +Avg ME (F77/GPU) = 1.2828039901590284E-002 +Relative difference = 7.67145379496374e-09 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.054705e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.228539e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.228539e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 6.386240 sec -INFO: No Floating Point Exceptions have been reported - 19,440,857,068 cycles # 3.040 GHz - 46,292,428,054 instructions # 2.38 insn per cycle - 6.396172423 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.389376e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.597120e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.597120e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 5.261993 sec +INFO: No Floating Point Exceptions have been reported + 17,636,830,308 cycles:u # 3.345 GHz (74.97%) + 50,117,030 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.97%) + 539,327,437 stalled-cycles-backend:u # 3.06% backend cycles idle (74.98%) + 47,039,999,877 instructions:u # 2.67 insn per cycle + # 0.01 stalled cycles per insn (74.97%) + 5.276599745 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 471) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.676436e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.220798e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.220798e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 4.182593 sec -INFO: No Floating Point Exceptions have been reported - 12,700,648,520 cycles # 3.030 GHz - 31,544,456,287 instructions # 2.48 insn per cycle - 4.192353583 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1724) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.085003e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.622247e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.622247e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.744645 sec +INFO: No Floating Point Exceptions have been reported + 12,398,692,512 cycles:u # 3.302 GHz (74.89%) + 50,378,852 stalled-cycles-frontend:u # 0.41% frontend cycles idle (74.89%) + 483,550,224 stalled-cycles-backend:u # 3.90% backend cycles idle (75.00%) + 31,116,176,638 instructions:u # 2.51 insn per cycle + # 0.02 stalled cycles per insn (75.07%) + 3.759135491 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1654) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.967779e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.746605e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.746605e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.623519 sec -INFO: No Floating Point Exceptions have been reported - 10,490,743,681 cycles # 2.889 GHz - 19,585,261,086 instructions # 1.87 insn per cycle - 3.632834496 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2036) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.789842e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.742527e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.742527e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 2.983691 sec +INFO: No Floating Point Exceptions have been reported + 9,708,487,937 cycles:u # 3.243 GHz (74.91%) + 51,457,671 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.91%) + 665,719,250 stalled-cycles-backend:u # 6.86% backend cycles idle (74.93%) + 19,217,448,091 instructions:u # 1.98 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 2.998648091 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2008) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.002208e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.806194e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.806194e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.572021 sec -INFO: No Floating Point Exceptions have been reported - 10,103,456,274 cycles # 2.822 GHz - 19,279,378,017 instructions # 1.91 insn per cycle - 3.581949884 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1766) (512y: 191) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.930358e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.638228e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.638228e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 -TOTAL : 3.683083 sec -INFO: No Floating Point Exceptions have been reported - 8,384,754,211 cycles # 2.271 GHz - 15,047,526,015 instructions # 1.79 insn per cycle - 3.693325560 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 959) (512y: 155) (512z: 1296) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039951670679E-002 -Relative difference = 3.767475112924841e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 7468338173..262973dfc9 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_10:24:35 -DATE: 2024-10-02_22:23:58 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.498098e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.405782e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.004369e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.530626 sec -INFO: No Floating Point Exceptions have been reported - 2,255,350,138 cycles # 2.943 GHz - 3,167,522,189 instructions # 1.40 insn per cycle - 0.824213544 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.795706e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.246793e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.263960e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.417372 sec +INFO: No Floating Point Exceptions have been reported + 1,002,312,084 cycles:u # 2.389 GHz (75.58%) + 2,537,157 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.39%) + 5,476,658 stalled-cycles-backend:u # 0.55% backend cycles idle (74.87%) + 1,589,322,484 instructions:u # 1.59 insn per cycle + # 0.00 stalled cycles per insn (74.00%) + 0.479214572 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.886686e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.936500e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.936500e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.692636 sec -INFO: No Floating Point Exceptions have been reported - 17,368,647,605 cycles # 3.046 GHz - 46,027,534,067 instructions # 2.65 insn per cycle - 5.703786393 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.605688e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.669316e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.669316e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.202703 sec +INFO: No Floating Point Exceptions have been reported + 14,442,340,371 cycles:u # 3.427 GHz (74.96%) + 9,564,071 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.95%) + 3,687,995,069 stalled-cycles-backend:u # 25.54% backend cycles idle (74.96%) + 45,567,415,149 instructions:u # 3.16 insn per cycle + # 0.08 stalled cycles per insn (74.95%) + 4.218705673 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.323966e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.493999e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.493999e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.291463 sec -INFO: No Floating Point Exceptions have been reported - 10,086,066,895 cycles # 3.055 GHz - 27,948,730,669 instructions # 2.77 insn per cycle - 3.302659152 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.346809e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.542455e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.542455e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.591690 sec +INFO: No Floating Point Exceptions have been reported + 8,826,253,844 cycles:u # 3.391 GHz (74.74%) + 8,716,002 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.92%) + 2,662,862,677 stalled-cycles-backend:u # 30.17% backend cycles idle (75.07%) + 27,731,598,930 instructions:u # 3.14 insn per cycle + # 0.10 stalled cycles per insn (75.11%) + 2.608471057 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.198504e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.619384e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.619384e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.156330 sec -INFO: No Floating Point Exceptions have been reported - 6,234,386,062 cycles # 2.877 GHz - 12,684,453,152 instructions # 2.03 insn per cycle - 2.167952608 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.342765e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.872250e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.872250e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.606918 sec +INFO: No Floating Point Exceptions have been reported + 5,346,270,058 cycles:u # 3.304 GHz (74.83%) + 9,264,562 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.83%) + 108,531,822 stalled-cycles-backend:u # 2.03% backend cycles idle (74.83%) + 12,360,834,728 instructions:u # 2.31 insn per cycle + # 0.01 stalled cycles per insn (75.08%) + 1.623226321 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.685017e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.177140e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.177140e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.983940 sec -INFO: No Floating Point Exceptions have been reported - 5,724,695,862 cycles # 2.870 GHz - 12,129,787,940 instructions # 2.12 insn per cycle - 1.995450843 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.687151e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.892823e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.892823e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.999450 sec -INFO: No Floating Point Exceptions have been reported - 5,896,077,322 cycles # 1.959 GHz - 8,395,996,491 instructions # 1.42 insn per cycle - 3.011053687 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 5dd64826c7..518b9cf636 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,54 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:14:33 -DATE: 2024-10-02_23:01:01 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.684703e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.020852e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.020852e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.806676 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,134,464,403 cycles # 2.980 GHz - 4,838,192,243 instructions # 1.54 insn per cycle - 1.110475719 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.823557e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.808700e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.808700e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.236557 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,745,324,778 cycles:u # 2.962 GHz (74.96%) + 37,169,072 stalled-cycles-frontend:u # 0.99% frontend cycles idle (74.92%) + 1,118,909,477 stalled-cycles-backend:u # 29.87% backend cycles idle (75.01%) + 3,914,941,106 instructions:u # 1.05 insn per cycle + # 0.29 stalled cycles per insn (75.05%) + 1.307544711 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -79,35 +56,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.866865e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.914803e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.914803e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.802666 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 17,655,301,343 cycles # 3.040 GHz - 46,001,555,857 instructions # 2.61 insn per cycle - 5.809509158 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.603139e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.666619e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.666619e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.287763 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 14,548,513,521 cycles:u # 3.375 GHz (74.95%) + 8,379,260 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.95%) + 3,683,639,771 stalled-cycles-backend:u # 25.32% backend cycles idle (74.95%) + 45,666,488,751 instructions:u # 3.14 insn per cycle + # 0.08 stalled cycles per insn (75.01%) + 4.314628971 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -115,33 +93,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.287541e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.450328e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.450328e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.381050 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,321,096,155 cycles # 3.046 GHz - 28,032,087,820 instructions # 2.72 insn per cycle - 3.388593541 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.316630e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.510001e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.510001e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.694596 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,944,223,279 cycles:u # 3.292 GHz (74.99%) + 8,617,600 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.00%) + 2,703,947,139 stalled-cycles-backend:u # 30.23% backend cycles idle (74.98%) + 27,960,252,014 instructions:u # 3.13 insn per cycle + # 0.10 stalled cycles per insn (74.98%) + 2.721625196 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -149,33 +130,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.088715e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.474660e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.474660e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.249251 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,455,426,136 cycles # 2.862 GHz - 12,868,987,997 instructions # 1.99 insn per cycle - 2.256773746 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.263697e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.782608e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.782608e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.711012 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,503,749,219 cycles:u # 3.175 GHz (74.90%) + 9,739,850 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.08%) + 127,747,937 stalled-cycles-backend:u # 2.32% backend cycles idle (75.12%) + 12,548,320,264 instructions:u # 2.28 insn per cycle + # 0.01 stalled cycles per insn (75.12%) + 1.738305886 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -183,80 +167,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.518930e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.971845e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.971845e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.089965 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,014,910,839 cycles # 2.869 GHz - 12,312,588,648 instructions # 2.05 insn per cycle - 2.097490367 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.612291e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.802715e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.802715e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.094965 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,077,458,214 cycles # 1.960 GHz - 8,540,885,730 instructions # 1.41 insn per cycle - 3.102450264 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index fb067a4517..5ebe35f44d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:19:57 -DATE: 2024-10-02_23:12:54 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.237979e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.266698e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.961441e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.766101e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.257804e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.275208e+07 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 0.625341 sec -INFO: No Floating Point Exceptions have been reported - 2,549,638,677 cycles # 2.971 GHz - 3,713,912,250 instructions # 1.46 insn per cycle - 0.915676485 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +TOTAL : 1.082268 sec +INFO: No Floating Point Exceptions have been reported + 3,316,553,469 cycles:u # 3.004 GHz (74.94%) + 27,319,938 stalled-cycles-frontend:u # 0.82% frontend cycles idle (75.45%) + 1,100,706,908 stalled-cycles-backend:u # 33.19% backend cycles idle (75.01%) + 3,007,525,955 instructions:u # 0.91 insn per cycle + # 0.37 stalled cycles per insn (75.13%) + 1.143449305 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.890077e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.940474e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.940474e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.551130e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.612314e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.612314e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 5.720362 sec -INFO: No Floating Point Exceptions have been reported - 17,428,970,068 cycles # 3.044 GHz - 45,948,811,639 instructions # 2.64 insn per cycle - 5.726910837 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.291889 sec +INFO: No Floating Point Exceptions have been reported + 14,756,742,045 cycles:u # 3.429 GHz (74.92%) + 9,912,834 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.90%) + 3,604,260,866 stalled-cycles-backend:u # 24.42% backend cycles idle (74.93%) + 45,550,999,396 instructions:u # 3.09 insn per cycle + # 0.08 stalled cycles per insn (75.04%) + 4.305522360 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.312122e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.481190e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.481190e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.339443e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.532754e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.532754e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.331505 sec -INFO: No Floating Point Exceptions have been reported - 10,154,233,518 cycles # 3.043 GHz - 27,846,201,009 instructions # 2.74 insn per cycle - 3.337417969 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 2.597570 sec +INFO: No Floating Point Exceptions have been reported + 8,833,336,540 cycles:u # 3.386 GHz (74.72%) + 8,771,257 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.91%) + 2,674,666,106 stalled-cycles-backend:u # 30.28% backend cycles idle (75.07%) + 27,707,773,853 instructions:u # 3.14 insn per cycle + # 0.10 stalled cycles per insn (75.16%) + 2.611099203 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.219886e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.630778e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.630778e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 7.344690e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.876141e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.876141e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.174947 sec -INFO: No Floating Point Exceptions have been reported - 6,305,944,181 cycles # 2.892 GHz - 12,563,017,456 instructions # 1.99 insn per cycle - 2.180991635 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +TOTAL : 1.607135 sec +INFO: No Floating Point Exceptions have been reported + 5,353,107,590 cycles:u # 3.307 GHz (74.60%) + 9,513,922 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.59%) + 107,449,573 stalled-cycles-backend:u # 2.01% backend cycles idle (74.86%) + 12,332,779,751 instructions:u # 2.30 insn per cycle + # 0.01 stalled cycles per insn (75.11%) + 1.620726713 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.718682e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.205781e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.205781e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.000335 sec -INFO: No Floating Point Exceptions have been reported - 5,780,250,424 cycles # 2.882 GHz - 11,971,200,140 instructions # 2.07 insn per cycle - 2.006264960 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.757157e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.962049e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.962049e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.956645 sec -INFO: No Floating Point Exceptions have been reported - 5,909,728,884 cycles # 1.996 GHz - 8,241,949,857 instructions # 1.39 insn per cycle - 2.962494747 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index cfdfd81d8b..40155e52c1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,70 +1,50 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:18:11 -DATE: 2024-10-02_23:07:25 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.943490e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.339371e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.984539e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.708663 sec -INFO: No Floating Point Exceptions have been reported - 2,814,351,890 cycles # 2.973 GHz - 4,386,424,355 instructions # 1.56 insn per cycle - 1.004249462 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.508342e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.243101e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.260292e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.182059 sec +INFO: No Floating Point Exceptions have been reported + 3,627,604,642 cycles:u # 3.004 GHz (75.55%) + 36,593,388 stalled-cycles-frontend:u # 1.01% frontend cycles idle (75.05%) + 1,113,204,395 stalled-cycles-backend:u # 30.69% backend cycles idle (74.26%) + 3,905,912,620 instructions:u # 1.08 insn per cycle + # 0.29 stalled cycles per insn (74.27%) + 1.239357966 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -72,33 +52,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.883485e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.932448e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.932448e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.672690 sec -INFO: No Floating Point Exceptions have been reported - 17,267,443,034 cycles # 3.041 GHz - 45,934,071,651 instructions # 2.66 insn per cycle - 5.678248544 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.603155e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.666704e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.666704e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.208931 sec +INFO: No Floating Point Exceptions have been reported + 14,449,569,654 cycles:u # 3.424 GHz (74.98%) + 9,335,274 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.98%) + 3,641,385,463 stalled-cycles-backend:u # 25.20% backend cycles idle (74.98%) + 45,573,624,021 instructions:u # 3.15 insn per cycle + # 0.08 stalled cycles per insn (75.00%) + 4.222621372 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -106,31 +87,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.312433e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.476769e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.476769e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.271929 sec -INFO: No Floating Point Exceptions have been reported - 9,963,025,400 cycles # 3.040 GHz - 27,846,624,194 instructions # 2.79 insn per cycle - 3.277897304 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.337190e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.531828e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.531828e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.598412 sec +INFO: No Floating Point Exceptions have been reported + 8,806,856,659 cycles:u # 3.374 GHz (74.89%) + 9,071,023 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.88%) + 2,680,361,322 stalled-cycles-backend:u # 30.43% backend cycles idle (74.91%) + 27,742,238,202 instructions:u # 3.15 insn per cycle + # 0.10 stalled cycles per insn (75.06%) + 2.611954894 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -138,31 +122,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.239087e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.651240e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.651240e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.106521 sec -INFO: No Floating Point Exceptions have been reported - 6,082,880,254 cycles # 2.881 GHz - 12,580,112,604 instructions # 2.07 insn per cycle - 2.112469814 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.332743e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.867606e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.867606e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.610457 sec +INFO: No Floating Point Exceptions have been reported + 5,332,808,909 cycles:u # 3.288 GHz (74.85%) + 9,552,700 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.89%) + 125,813,916 stalled-cycles-backend:u # 2.36% backend cycles idle (74.89%) + 12,389,288,629 instructions:u # 2.32 insn per cycle + # 0.01 stalled cycles per insn (74.86%) + 1.624079398 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -170,76 +157,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.713560e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.205418e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.205418e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.942615 sec -INFO: No Floating Point Exceptions have been reported - 5,598,784,098 cycles # 2.875 GHz - 12,021,854,440 instructions # 2.15 insn per cycle - 1.948464491 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.721108e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.921919e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.921919e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.924395 sec -INFO: No Floating Point Exceptions have been reported - 5,709,016,650 cycles # 1.949 GHz - 8,292,946,160 instructions # 1.45 insn per cycle - 2.930717532 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index e452755d81..1139a514e8 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_10:24:48 -DATE: 2024-10-02_22:24:23 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.448581e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.354023e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.002210e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.528267 sec -INFO: No Floating Point Exceptions have been reported - 2,275,766,454 cycles # 2.946 GHz - 3,236,087,959 instructions # 1.42 insn per cycle - 0.829364074 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.868420e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.360333e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.379306e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.396279 sec +INFO: No Floating Point Exceptions have been reported + 1,007,854,239 cycles:u # 2.437 GHz (75.90%) + 2,351,504 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.22%) + 11,822,469 stalled-cycles-backend:u # 1.17% backend cycles idle (74.03%) + 1,547,822,021 instructions:u # 1.54 insn per cycle + # 0.01 stalled cycles per insn (74.48%) + 0.451945393 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.936081e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.988461e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.988461e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.552727 sec -INFO: No Floating Point Exceptions have been reported - 16,901,199,171 cycles # 3.038 GHz - 45,022,482,452 instructions # 2.66 insn per cycle - 5.563984445 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.662256e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.729946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.729946e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.117486 sec +INFO: No Floating Point Exceptions have been reported + 14,122,801,366 cycles:u # 3.420 GHz (75.01%) + 8,761,903 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) + 286,825,352 stalled-cycles-backend:u # 2.03% backend cycles idle (75.01%) + 44,420,019,295 instructions:u # 3.15 insn per cycle + # 0.01 stalled cycles per insn (75.01%) + 4.133446933 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.485422e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.673978e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.673978e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.149592 sec -INFO: No Floating Point Exceptions have been reported - 9,645,674,288 cycles # 3.052 GHz - 26,795,751,605 instructions # 2.78 insn per cycle - 3.161004757 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.599556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.816704e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.816704e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.458740 sec +INFO: No Floating Point Exceptions have been reported + 8,332,546,922 cycles:u # 3.374 GHz (74.95%) + 9,140,076 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.06%) + 623,618,114 stalled-cycles-backend:u # 7.48% backend cycles idle (75.06%) + 26,731,412,858 instructions:u # 3.21 insn per cycle + # 0.02 stalled cycles per insn (75.06%) + 2.474800682 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2266) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.736441e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.083709e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.083709e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.353548 sec -INFO: No Floating Point Exceptions have been reported - 6,761,037,249 cycles # 2.860 GHz - 14,228,059,801 instructions # 2.10 insn per cycle - 2.365157520 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2711) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.604213e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.030498e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.030498e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.766196 sec +INFO: No Floating Point Exceptions have been reported + 5,918,186,168 cycles:u # 3.330 GHz (74.65%) + 9,909,098 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.61%) + 1,417,938,134 stalled-cycles-backend:u # 23.96% backend cycles idle (74.91%) + 14,155,302,337 instructions:u # 2.39 insn per cycle + # 0.10 stalled cycles per insn (75.14%) + 1.781864242 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.968829e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.344780e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.344780e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.247383 sec -INFO: No Floating Point Exceptions have been reported - 6,510,703,452 cycles # 2.883 GHz - 13,816,231,944 instructions # 2.12 insn per cycle - 2.258945119 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 298) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.569827e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.756116e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.756116e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.073181 sec -INFO: No Floating Point Exceptions have been reported - 6,036,497,255 cycles # 1.958 GHz - 10,155,247,558 instructions # 1.68 insn per cycle - 3.084089287 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1273) (512y: 208) (512z: 1988) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index 3f301e0024..d076826ea5 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:05:15 -DATE: 2024-10-02_22:52:06 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.340998e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.340259e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.003199e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.527026 sec -INFO: No Floating Point Exceptions have been reported - 2,260,619,407 cycles # 2.959 GHz - 3,198,102,043 instructions # 1.41 insn per cycle - 0.820578908 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.783361e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.238053e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.255152e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.409960 sec +INFO: No Floating Point Exceptions have been reported + 983,336,857 cycles:u # 2.303 GHz (76.14%) + 2,469,457 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.18%) + 4,976,926 stalled-cycles-backend:u # 0.51% backend cycles idle (74.79%) + 1,616,266,414 instructions:u # 1.64 insn per cycle + # 0.00 stalled cycles per insn (74.74%) + 0.471255682 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.506708e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.593742e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.593742e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.317728 sec -INFO: No Floating Point Exceptions have been reported - 13,126,642,398 cycles # 3.033 GHz - 34,433,015,624 instructions # 2.62 insn per cycle - 4.328677433 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.013156e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.100079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.100079e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.658896 sec +INFO: No Floating Point Exceptions have been reported + 12,513,156,258 cycles:u # 3.408 GHz (74.95%) + 9,245,117 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.96%) + 4,143,339,835 stalled-cycles-backend:u # 33.11% backend cycles idle (74.95%) + 35,233,343,785 instructions:u # 2.82 insn per cycle + # 0.12 stalled cycles per insn (74.97%) + 3.675945427 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 885) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.048635e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.191144e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.191144e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.577251 sec -INFO: No Floating Point Exceptions have been reported - 10,804,930,606 cycles # 3.011 GHz - 24,342,813,964 instructions # 2.25 insn per cycle - 3.588852357 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2610) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.636349e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.855133e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.855133e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.442307 sec +INFO: No Floating Point Exceptions have been reported + 8,249,817,051 cycles:u # 3.361 GHz (74.93%) + 9,147,408 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) + 1,535,876,861 stalled-cycles-backend:u # 18.62% backend cycles idle (74.91%) + 21,739,807,224 instructions:u # 2.64 insn per cycle + # 0.07 stalled cycles per insn (74.98%) + 2.458718626 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2458) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.768382e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.111158e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.111158e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.336794 sec -INFO: No Floating Point Exceptions have been reported - 6,749,191,802 cycles # 2.875 GHz - 12,499,645,150 instructions # 1.85 insn per cycle - 2.348240674 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3115) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.777304e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.226878e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.226878e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.727248 sec +INFO: No Floating Point Exceptions have been reported + 5,769,321,383 cycles:u # 3.317 GHz (74.76%) + 9,128,614 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.96%) + 1,712,066,843 stalled-cycles-backend:u # 29.68% backend cycles idle (75.17%) + 11,985,793,290 instructions:u # 2.08 insn per cycle + # 0.14 stalled cycles per insn (74.96%) + 1.744117750 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3012) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.125412e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.517975e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.517975e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.179421 sec -INFO: No Floating Point Exceptions have been reported - 6,250,432,884 cycles # 2.855 GHz - 11,637,371,150 instructions # 1.86 insn per cycle - 2.190039392 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2644) (512y: 239) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.990556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.222673e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.222673e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.765356 sec -INFO: No Floating Point Exceptions have been reported - 5,500,150,684 cycles # 1.982 GHz - 9,392,876,056 instructions # 1.71 insn per cycle - 2.776424500 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2099) (512y: 282) (512z: 1958) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index be2a10e541..fa4a6a7e86 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:05:26 -DATE: 2024-10-02_22:52:30 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.338457e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.391663e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.003521e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.525273 sec -INFO: No Floating Point Exceptions have been reported - 2,295,553,727 cycles # 2.964 GHz - 3,280,425,227 instructions # 1.43 insn per cycle - 0.830798805 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.851942e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.349824e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.368613e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.400885 sec +INFO: No Floating Point Exceptions have been reported + 1,014,543,247 cycles:u # 2.418 GHz (75.45%) + 2,334,024 stalled-cycles-frontend:u # 0.23% frontend cycles idle (76.92%) + 5,645,449 stalled-cycles-backend:u # 0.56% backend cycles idle (76.86%) + 1,545,347,292 instructions:u # 1.52 insn per cycle + # 0.00 stalled cycles per insn (73.89%) + 0.463602690 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516822 -Relative difference = 3.2588034143755247e-07 +Avg ME (F77/GPU) = 2.0288063388516817 +Relative difference = 3.258803416564443e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.661937e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.759812e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.759812e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 4.074785 sec -INFO: No Floating Point Exceptions have been reported - 12,438,640,427 cycles # 3.045 GHz - 35,010,031,379 instructions # 2.81 insn per cycle - 4.085812214 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 430) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.568775e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.690176e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.690176e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 3.120636 sec +INFO: No Floating Point Exceptions have been reported + 10,616,092,847 cycles:u # 3.388 GHz (74.98%) + 9,061,560 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.98%) + 143,862,654 stalled-cycles-backend:u # 1.36% backend cycles idle (75.00%) + 34,765,673,828 instructions:u # 3.27 insn per cycle + # 0.00 stalled cycles per insn (74.99%) + 3.137819570 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.097398e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.243177e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.243177e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.521928 sec -INFO: No Floating Point Exceptions have been reported - 10,753,008,888 cycles # 3.045 GHz - 23,438,472,557 instructions # 2.18 insn per cycle - 3.532739913 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2378) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.034823e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.297045e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.297045e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.262756 sec +INFO: No Floating Point Exceptions have been reported + 7,641,580,700 cycles:u # 3.359 GHz (74.96%) + 9,121,529 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.04%) + 1,914,691,093 stalled-cycles-backend:u # 25.06% backend cycles idle (75.03%) + 21,062,439,124 instructions:u # 2.76 insn per cycle + # 0.09 stalled cycles per insn (75.03%) + 2.279387532 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.175589e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.585353e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.585353e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.163821 sec -INFO: No Floating Point Exceptions have been reported - 6,187,478,021 cycles # 2.846 GHz - 11,963,155,641 instructions # 1.93 insn per cycle - 2.174767157 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2468) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.381598e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.919001e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.919001e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.601135 sec +INFO: No Floating Point Exceptions have been reported + 5,329,144,968 cycles:u # 3.303 GHz (74.80%) + 9,061,646 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.72%) + 1,024,318,548 stalled-cycles-backend:u # 19.22% backend cycles idle (74.86%) + 11,328,230,141 instructions:u # 2.13 insn per cycle + # 0.09 stalled cycles per insn (75.11%) + 1.617993241 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2332) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.198229e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.610952e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.610952e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.154188 sec -INFO: No Floating Point Exceptions have been reported - 6,208,478,460 cycles # 2.868 GHz - 11,196,014,039 instructions # 1.80 insn per cycle - 2.165281437 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2098) (512y: 174) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.145182e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.398127e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.398127e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.669310 sec -INFO: No Floating Point Exceptions have been reported - 5,332,222,689 cycles # 1.990 GHz - 9,116,285,421 instructions # 1.71 insn per cycle - 2.680750400 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1632) (512y: 208) (512z: 1567) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063388516204 -Relative difference = 3.2588037186351226e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 62e8332824..ee04ec4f60 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_10:25:00 -DATE: 2024-10-02_22:24:48 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.165719e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.725538e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.839606e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.490916 sec -INFO: No Floating Point Exceptions have been reported - 2,110,795,508 cycles # 2.938 GHz - 3,030,625,876 instructions # 1.44 insn per cycle - 0.775391712 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.848450e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.165587e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.189401e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 +TOTAL : 0.336046 sec +INFO: No Floating Point Exceptions have been reported + 791,799,591 cycles:u # 2.270 GHz (74.93%) + 2,269,676 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.87%) + 6,629,454 stalled-cycles-backend:u # 0.84% backend cycles idle (75.04%) + 1,529,378,535 instructions:u # 1.93 insn per cycle + # 0.00 stalled cycles per insn (73.60%) + 0.391082759 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.990027e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.047358e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.047358e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.362790 sec -INFO: No Floating Point Exceptions have been reported - 16,310,909,453 cycles # 3.038 GHz - 45,362,091,727 instructions # 2.78 insn per cycle - 5.370503759 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.988657e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.072972e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.072972e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.652105 sec +INFO: No Floating Point Exceptions have been reported + 12,612,098,802 cycles:u # 3.446 GHz (74.90%) + 7,268,244 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.89%) + 15,208,438 stalled-cycles-backend:u # 0.12% backend cycles idle (74.98%) + 45,478,259,156 instructions:u # 3.61 insn per cycle + # 0.00 stalled cycles per insn (75.08%) + 3.664109055 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 +Avg ME (F77/C++) = 2.0288198337657377 +Relative difference = 8.193642726087208e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.603236e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.957062e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.957062e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.377677 sec -INFO: No Floating Point Exceptions have been reported - 7,152,928,948 cycles # 2.999 GHz - 17,830,970,577 instructions # 2.49 insn per cycle - 2.385771116 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.304242e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.700465e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.700465e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.808110 sec +INFO: No Floating Point Exceptions have been reported + 6,146,797,671 cycles:u # 3.386 GHz (74.93%) + 6,779,824 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.89%) + 2,584,489,706 stalled-cycles-backend:u # 42.05% backend cycles idle (74.89%) + 17,099,643,260 instructions:u # 2.78 insn per cycle + # 0.15 stalled cycles per insn (74.92%) + 1.819933619 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198775378987 +Relative difference = 6.036124513188701e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.574095e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.769268e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.769268e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.318456 sec -INFO: No Floating Point Exceptions have been reported - 3,796,804,907 cycles # 2.864 GHz - 8,300,184,284 instructions # 2.19 insn per cycle - 1.326383790 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.200088e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.344397e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.344397e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.018174 sec +INFO: No Floating Point Exceptions have been reported + 3,368,462,675 cycles:u # 3.284 GHz (75.11%) + 6,657,313 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.05%) + 1,076,136,635 stalled-cycles-backend:u # 31.95% backend cycles idle (75.04%) + 8,075,374,342 instructions:u # 2.40 insn per cycle + # 0.13 stalled cycles per insn (75.04%) + 1.029920053 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186282850802 +Relative difference = 1.8321738890139266e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.092654e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.045479e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.045479e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.251317 sec -INFO: No Floating Point Exceptions have been reported - 3,616,269,256 cycles # 2.873 GHz - 7,955,766,878 instructions # 2.20 insn per cycle - 1.259613074 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.839534e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.547643e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.547643e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.631498 sec -INFO: No Floating Point Exceptions have been reported - 3,329,875,936 cycles # 2.032 GHz - 6,139,934,168 instructions # 1.84 insn per cycle - 1.639821352 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 630c641b74..4fb6afacf1 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,54 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:14:46 -DATE: 2024-10-02_23:01:26 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.033781e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.271776e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.271776e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.678665 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,680,382,600 cycles # 2.941 GHz - 4,125,886,335 instructions # 1.54 insn per cycle - 0.969131900 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 7.902290e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.846454e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.846454e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 +TOTAL : 1.154218 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,556,835,307 cycles:u # 3.029 GHz (75.15%) + 20,990,140 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.78%) + 1,118,879,516 stalled-cycles-backend:u # 31.46% backend cycles idle (74.86%) + 3,787,419,515 instructions:u # 1.06 insn per cycle + # 0.30 stalled cycles per insn (74.59%) + 1.212173867 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -79,35 +56,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.992729e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.049211e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.049211e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.392675 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 16,447,641,920 cycles # 3.047 GHz - 45,376,165,291 instructions # 2.76 insn per cycle - 5.399694143 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.988122e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.072513e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.072513e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.693823 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 12,638,685,291 cycles:u # 3.409 GHz (74.97%) + 7,497,113 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.99%) + 45,528,421 stalled-cycles-backend:u # 0.36% backend cycles idle (74.99%) + 45,589,213,942 instructions:u # 3.61 insn per cycle + # 0.00 stalled cycles per insn (74.97%) + 3.711404559 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -115,33 +93,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 +Avg ME (F77/C++) = 2.0288198337657377 +Relative difference = 8.193642726087208e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.622643e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.967470e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.967470e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.403008 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,299,949,064 cycles # 3.030 GHz - 18,072,622,777 instructions # 2.48 insn per cycle - 2.410009326 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.065974e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.433122e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.433122e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.917484 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,448,677,292 cycles:u # 3.340 GHz (74.76%) + 6,354,577 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.80%) + 2,804,322,349 stalled-cycles-backend:u # 43.49% backend cycles idle (75.00%) + 17,249,385,401 instructions:u # 2.67 insn per cycle + # 0.16 stalled cycles per insn (75.14%) + 1.934734505 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -149,33 +130,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198775378987 +Relative difference = 6.036124513188701e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.349642e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.466667e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.466667e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.394511 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,022,324,849 cycles # 2.873 GHz - 8,505,914,761 instructions # 2.11 insn per cycle - 1.400755806 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.188263e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.329763e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.329763e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.073007 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,453,359,880 cycles:u # 3.179 GHz (75.02%) + 7,203,128 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.97%) + 1,092,295,238 stalled-cycles-backend:u # 31.63% backend cycles idle (74.96%) + 8,275,782,953 instructions:u # 2.40 insn per cycle + # 0.13 stalled cycles per insn (75.02%) + 1.091265410 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -183,80 +167,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186282850802 +Relative difference = 1.8321738890139266e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.999206e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.031817e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.031817e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.296911 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,769,931,058 cycles # 2.893 GHz - 8,150,658,922 instructions # 2.16 insn per cycle - 1.303972646 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.810871e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.499560e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.499560e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.673742 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,483,753,004 cycles # 2.073 GHz - 6,352,116,456 instructions # 1.82 insn per cycle - 1.680900164 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 6618ce9254..762f16450e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:20:10 -DATE: 2024-10-02_23:13:19 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.987374e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.707237e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.828345e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 -TOTAL : 0.574914 sec -INFO: No Floating Point Exceptions have been reported - 2,354,975,975 cycles # 2.955 GHz - 3,428,501,052 instructions # 1.46 insn per cycle - 0.856281449 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.588714e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.159655e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.183290e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.080340e+00 +- 3.470037e-03 ) GeV^0 +TOTAL : 1.012466 sec +INFO: No Floating Point Exceptions have been reported + 3,144,227,554 cycles:u # 3.052 GHz (74.39%) + 10,791,235 stalled-cycles-frontend:u # 0.34% frontend cycles idle (74.44%) + 1,121,436,459 stalled-cycles-backend:u # 35.67% backend cycles idle (74.65%) + 2,941,132,864 instructions:u # 0.94 insn per cycle + # 0.38 stalled cycles per insn (74.92%) + 1.068167444 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.994861e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.050592e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.050592e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.976991e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.060583e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.060583e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 5.393986 sec -INFO: No Floating Point Exceptions have been reported - 16,418,504,516 cycles # 3.041 GHz - 45,362,649,560 instructions # 2.76 insn per cycle - 5.399598972 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.667094 sec +INFO: No Floating Point Exceptions have been reported + 12,641,839,385 cycles:u # 3.441 GHz (74.96%) + 7,496,531 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.96%) + 33,050,827 stalled-cycles-backend:u # 0.26% backend cycles idle (74.96%) + 45,564,942,632 instructions:u # 3.60 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 3.676289637 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 +Avg ME (F77/C++) = 2.0288198337657377 +Relative difference = 8.193642726087208e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.530039e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.859076e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.859076e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.456930 sec -INFO: No Floating Point Exceptions have been reported - 7,301,275,560 cycles # 2.966 GHz - 17,806,613,996 instructions # 2.44 insn per cycle - 2.462297497 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.292930e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.687941e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.687941e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.811019 sec +INFO: No Floating Point Exceptions have been reported + 6,152,822,392 cycles:u # 3.384 GHz (74.93%) + 6,891,410 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.92%) + 2,580,267,707 stalled-cycles-backend:u # 41.94% backend cycles idle (74.92%) + 17,083,592,107 instructions:u # 2.78 insn per cycle + # 0.15 stalled cycles per insn (74.94%) + 1.820069994 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198775378987 +Relative difference = 6.036124513188701e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.656659e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.868466e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.868466e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.350339 sec -INFO: No Floating Point Exceptions have been reported - 3,915,528,494 cycles # 2.889 GHz - 8,245,555,563 instructions # 2.11 insn per cycle - 1.356032687 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.200840e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.345000e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.345000e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.018437 sec +INFO: No Floating Point Exceptions have been reported + 3,355,093,185 cycles:u # 3.272 GHz (75.04%) + 6,885,352 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.04%) + 1,079,646,151 stalled-cycles-backend:u # 32.18% backend cycles idle (75.04%) + 8,103,194,689 instructions:u # 2.42 insn per cycle + # 0.13 stalled cycles per insn (75.04%) + 1.027456598 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186282850802 +Relative difference = 1.8321738890139266e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.182418e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.053986e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.053986e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 -TOTAL : 1.281920 sec -INFO: No Floating Point Exceptions have been reported - 3,731,783,402 cycles # 2.900 GHz - 7,862,528,502 instructions # 2.11 insn per cycle - 1.287315829 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.860238e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.561872e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.561872e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.668691 sec -INFO: No Floating Point Exceptions have been reported - 3,447,157,076 cycles # 2.060 GHz - 6,046,313,937 instructions # 1.75 insn per cycle - 1.674405054 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index d009382057..d38f0dd075 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,70 +1,50 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:18:24 -DATE: 2024-10-02_23:07:49 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.732740e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.726714e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.848355e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 -TOTAL : 0.621200 sec -INFO: No Floating Point Exceptions have been reported - 2,502,023,855 cycles # 2.967 GHz - 3,885,363,287 instructions # 1.55 insn per cycle - 0.901561261 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 8.694581e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.155961e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.179501e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 +TOTAL : 1.124710 sec +INFO: No Floating Point Exceptions have been reported + 3,529,678,849 cycles:u # 3.075 GHz (74.28%) + 20,680,935 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.37%) + 1,112,277,947 stalled-cycles-backend:u # 31.51% backend cycles idle (74.44%) + 3,734,266,536 instructions:u # 1.06 insn per cycle + # 0.30 stalled cycles per insn (75.36%) + 1.179583202 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -72,33 +52,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.981553e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.037751e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.037751e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.376232 sec -INFO: No Floating Point Exceptions have been reported - 16,248,042,022 cycles # 3.020 GHz - 45,331,416,361 instructions # 2.79 insn per cycle - 5.381836614 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.976978e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.060428e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.060428e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.668167 sec +INFO: No Floating Point Exceptions have been reported + 12,646,077,468 cycles:u # 3.441 GHz (74.97%) + 7,141,243 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.97%) + 15,653,788 stalled-cycles-backend:u # 0.12% backend cycles idle (74.97%) + 45,478,593,220 instructions:u # 3.60 insn per cycle + # 0.00 stalled cycles per insn (74.97%) + 3.677293409 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -106,31 +87,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 +Avg ME (F77/C++) = 2.0288198337657377 +Relative difference = 8.193642726087208e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.659533e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.006067e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.006067e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.336141 sec -INFO: No Floating Point Exceptions have been reported - 7,090,666,725 cycles # 3.029 GHz - 17,790,450,090 instructions # 2.51 insn per cycle - 2.341746280 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.083713e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.528278e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.528278e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.882152 sec +INFO: No Floating Point Exceptions have been reported + 6,365,355,019 cycles:u # 3.369 GHz (75.02%) + 6,125,831 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.02%) + 2,586,519,085 stalled-cycles-backend:u # 40.63% backend cycles idle (75.02%) + 17,067,881,993 instructions:u # 2.68 insn per cycle + # 0.15 stalled cycles per insn (75.02%) + 1.891163022 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -138,31 +122,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198775378987 +Relative difference = 6.036124513188701e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.679787e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.897823e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.897823e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.291813 sec -INFO: No Floating Point Exceptions have been reported - 3,744,555,670 cycles # 2.888 GHz - 8,261,514,353 instructions # 2.21 insn per cycle - 1.297385166 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.199327e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.343590e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.343590e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.019173 sec +INFO: No Floating Point Exceptions have been reported + 3,378,128,655 cycles:u # 3.292 GHz (74.87%) + 6,791,969 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.06%) + 1,077,009,683 stalled-cycles-backend:u # 31.88% backend cycles idle (75.06%) + 8,071,602,588 instructions:u # 2.39 insn per cycle + # 0.13 stalled cycles per insn (75.06%) + 1.028260910 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -170,76 +157,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186282850802 +Relative difference = 1.8321738890139266e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.138641e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.050679e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.050679e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.232224 sec -INFO: No Floating Point Exceptions have been reported - 3,566,706,619 cycles # 2.883 GHz - 7,912,197,395 instructions # 2.22 insn per cycle - 1.237921630 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.776715e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.464027e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.464027e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.632182 sec -INFO: No Floating Point Exceptions have been reported - 3,300,564,042 cycles # 2.017 GHz - 6,098,644,443 instructions # 1.85 insn per cycle - 1.637359770 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 114cd37caa..5f0c64fea0 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_10:25:10 -DATE: 2024-10-02_22:25:08 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.148449e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.747307e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.868608e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.487780 sec -INFO: No Floating Point Exceptions have been reported - 2,112,765,884 cycles # 2.953 GHz - 3,008,781,494 instructions # 1.42 insn per cycle - 0.773144472 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.789516e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.145914e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.169020e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 +TOTAL : 0.333065 sec +INFO: No Floating Point Exceptions have been reported + 807,156,755 cycles:u # 2.331 GHz (76.13%) + 2,357,253 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.88%) + 6,916,176 stalled-cycles-backend:u # 0.86% backend cycles idle (74.29%) + 1,515,346,659 instructions:u # 1.88 insn per cycle + # 0.00 stalled cycles per insn (75.20%) + 0.388164521 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.032943e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.092094e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.092094e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 5.252513 sec -INFO: No Floating Point Exceptions have been reported - 15,985,799,367 cycles # 3.040 GHz - 44,469,540,251 instructions # 2.78 insn per cycle - 5.260076645 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 536) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.995767e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.084272e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.084272e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 3.647183 sec +INFO: No Floating Point Exceptions have been reported + 12,561,245,397 cycles:u # 3.437 GHz (74.90%) + 7,105,600 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.97%) + 1,877,455,833 stalled-cycles-backend:u # 14.95% backend cycles idle (75.05%) + 44,204,929,073 instructions:u # 3.52 insn per cycle + # 0.04 stalled cycles per insn (75.05%) + 3.659050401 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198669441044 -Relative difference = 6.558289825352968e-08 +Avg ME (F77/C++) = 2.0288198337657377 +Relative difference = 8.193642726087208e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.499648e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.992066e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.992066e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.003668 sec -INFO: No Floating Point Exceptions have been reported - 6,125,955,843 cycles # 3.046 GHz - 17,118,502,582 instructions # 2.79 insn per cycle - 2.011813253 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.526162e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.102574e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.102574e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.536647 sec +INFO: No Floating Point Exceptions have been reported + 5,204,363,119 cycles:u # 3.371 GHz (74.69%) + 6,659,030 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.88%) + 1,476,441,909 stalled-cycles-backend:u # 28.37% backend cycles idle (75.13%) + 16,884,742,552 instructions:u # 3.24 insn per cycle + # 0.09 stalled cycles per insn (75.13%) + 1.548405867 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2753) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193075684831 -Relative difference = 1.515997647531052e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198775378987 +Relative difference = 6.036124513188701e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.167880e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.760431e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.760431e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.797931 sec -INFO: No Floating Point Exceptions have been reported - 5,167,508,425 cycles # 2.864 GHz - 10,273,109,370 instructions # 1.99 insn per cycle - 1.805362641 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3907) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.932780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.706486e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.706486e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.317001 sec +INFO: No Floating Point Exceptions have been reported + 4,431,535,580 cycles:u # 3.346 GHz (74.72%) + 7,766,562 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.98%) + 1,716,277,430 stalled-cycles-backend:u # 38.73% backend cycles idle (75.24%) + 10,221,463,894 instructions:u # 2.31 insn per cycle + # 0.17 stalled cycles per insn (75.24%) + 1.328848484 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3885) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186282850802 +Relative difference = 1.8321738890139266e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.132241e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.737534e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.737534e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.807508 sec -INFO: No Floating Point Exceptions have been reported - 5,031,342,767 cycles # 2.773 GHz - 10,030,466,689 instructions # 1.99 insn per cycle - 1.815492489 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3806) (512y: 2) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288181869545951 -Relative difference = 9.214951531400725e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.445722e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.755335e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.755335e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 2.460163 sec -INFO: No Floating Point Exceptions have been reported - 4,428,510,644 cycles # 1.795 GHz - 8,482,456,603 instructions # 1.92 insn per cycle - 2.468701093 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2746) (512y: 4) (512z: 2754) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183148950338 -Relative difference = 1.5521108056421764e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 0b6cd11934..828077b7db 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:05:37 -DATE: 2024-10-02_22:52:53 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.102016e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.726185e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.849782e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.483121 sec -INFO: No Floating Point Exceptions have been reported - 2,119,072,326 cycles # 2.979 GHz - 3,036,201,097 instructions # 1.43 insn per cycle - 0.768161183 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.801286e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.142736e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.165759e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 +TOTAL : 0.362839 sec +INFO: No Floating Point Exceptions have been reported + 814,301,438 cycles:u # 2.315 GHz (75.31%) + 2,382,034 stalled-cycles-frontend:u # 0.29% frontend cycles idle (73.62%) + 8,301,301 stalled-cycles-backend:u # 1.02% backend cycles idle (73.88%) + 1,483,774,354 instructions:u # 1.82 insn per cycle + # 0.01 stalled cycles per insn (75.06%) + 0.419887438 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.582380e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.679265e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.679265e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 4.152623 sec -INFO: No Floating Point Exceptions have been reported - 12,621,162,156 cycles # 3.035 GHz - 34,636,169,934 instructions # 2.74 insn per cycle - 4.159998956 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.735275e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.868405e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.868405e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 2.952476 sec +INFO: No Floating Point Exceptions have been reported + 10,149,563,542 cycles:u # 3.428 GHz (74.90%) + 6,976,343 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.89%) + 1,068,495,460 stalled-cycles-backend:u # 10.53% backend cycles idle (74.89%) + 34,540,376,808 instructions:u # 3.40 insn per cycle + # 0.03 stalled cycles per insn (74.96%) + 2.965659979 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 762) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199094356969 -Relative difference = 4.463890496342449e-08 +Avg ME (F77/C++) = 2.0288199088536203 +Relative difference = 4.4925808981097166e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.435300e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.931883e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.931883e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 2.023526 sec -INFO: No Floating Point Exceptions have been reported - 6,181,207,719 cycles # 3.045 GHz - 14,841,948,094 instructions # 2.40 insn per cycle - 2.030877083 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2975) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.544250e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.127175e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.127175e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.535257 sec +INFO: No Floating Point Exceptions have been reported + 5,181,760,333 cycles:u # 3.358 GHz (74.93%) + 6,568,469 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.12%) + 1,886,815,713 stalled-cycles-backend:u # 36.41% backend cycles idle (75.12%) + 14,556,262,369 instructions:u # 2.81 insn per cycle + # 0.13 stalled cycles per insn (75.12%) + 1.547415442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2947) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193755550310 -Relative difference = 1.8511017053446366e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198769558221 +Relative difference = 6.06481491495597e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.506636e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.401228e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.401228e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.488171 sec -INFO: No Floating Point Exceptions have been reported - 4,304,268,264 cycles # 2.880 GHz - 9,097,439,075 instructions # 2.11 insn per cycle - 1.495316579 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4456) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.713207e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.063516e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.063516e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.224721 sec +INFO: No Floating Point Exceptions have been reported + 4,075,097,190 cycles:u # 3.307 GHz (74.75%) + 7,173,604 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.74%) + 1,586,521,787 stalled-cycles-backend:u # 38.93% backend cycles idle (74.74%) + 8,954,862,198 instructions:u # 2.20 insn per cycle + # 0.18 stalled cycles per insn (74.92%) + 1.238013991 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4429) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182069780305 -Relative difference = 1.0201902325125583e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186736870557 +Relative difference = 1.6083886449260875e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.617162e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.560068e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.560068e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.470806 sec -INFO: No Floating Point Exceptions have been reported - 4,247,597,214 cycles # 2.875 GHz - 8,690,729,651 instructions # 2.05 insn per cycle - 1.478175129 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4233) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182069780305 -Relative difference = 1.0201902325125583e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.756503e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.250884e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.250884e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.915696 sec -INFO: No Floating Point Exceptions have been reported - 3,876,375,719 cycles # 2.017 GHz - 7,836,694,757 instructions # 2.02 insn per cycle - 1.923109061 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4273) (512y: 0) (512z: 2558) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183246739209 -Relative difference = 1.6003107281264138e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 99c5f1dd1c..3386f14e63 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:05:46 -DATE: 2024-10-02_22:53:12 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.190250e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.721947e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.846420e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 -TOTAL : 0.485415 sec -INFO: No Floating Point Exceptions have been reported - 2,076,120,147 cycles # 2.913 GHz - 2,915,349,838 instructions # 1.40 insn per cycle - 0.769560564 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.781398e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.112844e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.134629e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 +TOTAL : 0.334605 sec +INFO: No Floating Point Exceptions have been reported + 822,154,607 cycles:u # 2.356 GHz (74.95%) + 2,330,583 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.95%) + 8,335,753 stalled-cycles-backend:u # 1.01% backend cycles idle (75.41%) + 1,482,735,882 instructions:u # 1.80 insn per cycle + # 0.01 stalled cycles per insn (77.15%) + 0.391451760 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.028811e+00 -Avg ME (F77/GPU) = 2.0288499356247485 -Relative difference = 1.9191351362116207e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.028815e+00 +Avg ME (F77/GPU) = 2.0288173687877133 +Relative difference = 1.1675720622806321e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.762044e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.875011e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.875011e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 -TOTAL : 3.889711 sec -INFO: No Floating Point Exceptions have been reported - 11,863,310,263 cycles # 3.045 GHz - 35,106,472,280 instructions # 2.96 insn per cycle - 3.896935494 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.993924e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.145465e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.145465e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 2.770210 sec +INFO: No Floating Point Exceptions have been reported + 9,499,901,218 cycles:u # 3.420 GHz (75.05%) + 6,744,579 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.97%) + 7,175,092 stalled-cycles-backend:u # 0.08% backend cycles idle (74.95%) + 34,567,889,085 instructions:u # 3.64 insn per cycle + # 0.00 stalled cycles per insn (74.95%) + 2.782918776 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 434) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199094356969 -Relative difference = 4.463890496342449e-08 +Avg ME (F77/C++) = 2.0288199088536203 +Relative difference = 4.4925808981097166e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.629807e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.149090e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.149090e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 -TOTAL : 1.958719 sec -INFO: No Floating Point Exceptions have been reported - 5,974,407,691 cycles # 3.040 GHz - 14,562,989,936 instructions # 2.44 insn per cycle - 1.965935304 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2569) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.915685e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.551891e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.551891e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 +TOTAL : 1.469689 sec +INFO: No Floating Point Exceptions have been reported + 4,958,077,800 cycles:u # 3.355 GHz (74.94%) + 6,834,456 stalled-cycles-frontend:u # 0.14% frontend cycles idle (75.10%) + 1,291,600,051 stalled-cycles-backend:u # 26.05% backend cycles idle (75.10%) + 13,965,595,655 instructions:u # 2.82 insn per cycle + # 0.09 stalled cycles per insn (75.10%) + 1.482492521 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2467) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288193583255634 -Relative difference = 1.7661780742548925e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028820e+00 +Avg ME (F77/C++) = 2.0288198892958462 +Relative difference = 5.4565783974899003e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.627487e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.564550e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.564550e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.467639 sec -INFO: No Floating Point Exceptions have been reported - 4,208,313,007 cycles # 2.855 GHz - 8,876,905,434 instructions # 2.11 insn per cycle - 1.474726540 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3552) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.034991e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.140334e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.140334e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.157361 sec +INFO: No Floating Point Exceptions have been reported + 3,869,701,142 cycles:u # 3.321 GHz (74.69%) + 7,370,047 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.66%) + 1,428,425,272 stalled-cycles-backend:u # 36.91% backend cycles idle (74.89%) + 8,537,033,922 instructions:u # 2.21 insn per cycle + # 0.17 stalled cycles per insn (75.23%) + 1.169337912 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3397) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182107033208 -Relative difference = 1.0385521077446488e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288186836987734 +Relative difference = 1.559041129563128e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.625571e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.554690e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.554690e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.468279 sec -INFO: No Floating Point Exceptions have been reported - 4,239,649,829 cycles # 2.876 GHz - 8,443,717,794 instructions # 1.99 insn per cycle - 1.475031334 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3296) (512y: 0) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288182107033208 -Relative difference = 1.0385521077446488e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.780064e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.278902e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.278902e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 -TOTAL : 1.909081 sec -INFO: No Floating Point Exceptions have been reported - 3,835,043,638 cycles # 2.002 GHz - 7,729,492,795 instructions # 2.02 insn per cycle - 1.916628169 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 2110) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028818e+00 -Avg ME (F77/C++) = 2.0288183204829693 -Relative difference = 1.5796536184903122e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 6bbdeeb18d..b4a030267e 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_10:25:20 -DATE: 2024-10-02_22:25:30 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.375168e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.358758e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.991650e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.539275 sec -INFO: No Floating Point Exceptions have been reported - 2,197,147,211 cycles # 2.830 GHz - 3,171,133,289 instructions # 1.44 insn per cycle - 0.834260682 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.843910e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.328014e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.346502e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.404179 sec +INFO: No Floating Point Exceptions have been reported + 993,813,076 cycles:u # 2.363 GHz (75.25%) + 2,358,772 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.16%) + 11,114,523 stalled-cycles-backend:u # 1.12% backend cycles idle (73.62%) + 1,620,766,934 instructions:u # 1.63 insn per cycle + # 0.01 stalled cycles per insn (74.22%) + 0.466477700 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 +Avg ME (F77/GPU) = 2.0288063423243869 +Relative difference = 3.241686434838304e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.863199e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.911060e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.911060e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.765001 sec -INFO: No Floating Point Exceptions have been reported - 17,514,965,969 cycles # 3.033 GHz - 46,180,069,488 instructions # 2.64 insn per cycle - 5.776213723 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.599453e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.665264e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.665264e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.216177 sec +INFO: No Floating Point Exceptions have been reported + 14,448,775,342 cycles:u # 3.418 GHz (74.95%) + 8,614,204 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.02%) + 3,816,067,915 stalled-cycles-backend:u # 26.41% backend cycles idle (75.02%) + 45,665,454,139 instructions:u # 3.16 insn per cycle + # 0.08 stalled cycles per insn (75.02%) + 4.232344682 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.331354e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.503723e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.503723e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.286290 sec -INFO: No Floating Point Exceptions have been reported - 10,049,467,521 cycles # 3.048 GHz - 27,685,234,952 instructions # 2.75 insn per cycle - 3.297791625 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.292500e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.477313e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.477313e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.621716 sec +INFO: No Floating Point Exceptions have been reported + 8,910,589,768 cycles:u # 3.384 GHz (74.88%) + 7,902,507 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.82%) + 2,747,123,853 stalled-cycles-backend:u # 30.83% backend cycles idle (74.96%) + 27,566,692,372 instructions:u # 3.09 insn per cycle + # 0.10 stalled cycles per insn (75.09%) + 2.637286078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2518) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.194158e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.606158e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.606158e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.162271 sec -INFO: No Floating Point Exceptions have been reported - 6,182,412,740 cycles # 2.845 GHz - 12,592,550,468 instructions # 2.04 insn per cycle - 2.174037680 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2773) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.249154e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.909393e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.909393e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.634644 sec +INFO: No Floating Point Exceptions have been reported + 5,421,017,013 cycles:u # 3.293 GHz (74.77%) + 8,322,208 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.81%) + 933,804,845 stalled-cycles-backend:u # 17.23% backend cycles idle (75.05%) + 12,257,868,001 instructions:u # 2.26 insn per cycle + # 0.08 stalled cycles per insn (75.22%) + 1.651139869 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 +Avg ME (F77/C++) = 2.0288063930599014 +Relative difference = 2.9916108265801754e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.730742e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.240332e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.240332e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 1.970706 sec -INFO: No Floating Point Exceptions have been reported - 5,651,897,158 cycles # 2.853 GHz - 12,026,990,160 instructions # 2.13 insn per cycle - 1.982185993 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2518) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.609905e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.807717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.807717e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.045690 sec -INFO: No Floating Point Exceptions have been reported - 5,750,600,034 cycles # 1.881 GHz - 8,210,466,675 instructions # 1.43 insn per cycle - 3.057406229 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1862) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 532bb9e416..5f04e842f2 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_10:25:32 -DATE: 2024-10-02_22:25:54 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.200313e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.637883e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.154555e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 0.530194 sec -INFO: No Floating Point Exceptions have been reported - 2,265,001,691 cycles # 2.959 GHz - 3,241,984,092 instructions # 1.43 insn per cycle - 0.823101283 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.864951e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.362990e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.381998e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 +TOTAL : 0.403410 sec +INFO: No Floating Point Exceptions have been reported + 1,017,326,099 cycles:u # 2.423 GHz (74.64%) + 2,265,428 stalled-cycles-frontend:u # 0.22% frontend cycles idle (77.25%) + 5,213,166 stalled-cycles-backend:u # 0.51% backend cycles idle (75.81%) + 1,577,279,794 instructions:u # 1.55 insn per cycle + # 0.00 stalled cycles per insn (74.07%) + 0.463037987 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243874 -Relative difference = 3.241686432649386e-07 +Avg ME (F77/GPU) = 2.0288063423243869 +Relative difference = 3.241686434838304e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.918727e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.970297e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.970297e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 5.603990 sec -INFO: No Floating Point Exceptions have been reported - 17,066,108,883 cycles # 3.040 GHz - 45,206,022,775 instructions # 2.65 insn per cycle - 5.614933216 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.600556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.665048e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.665048e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 4.211548 sec +INFO: No Floating Point Exceptions have been reported + 14,429,603,779 cycles:u # 3.417 GHz (75.00%) + 9,191,990 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) + 3,203,701,294 stalled-cycles-backend:u # 22.20% backend cycles idle (75.00%) + 44,592,650,458 instructions:u # 3.09 insn per cycle + # 0.07 stalled cycles per insn (75.01%) + 4.227847419 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 590) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.464266e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.650227e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.650227e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 3.167234 sec -INFO: No Floating Point Exceptions have been reported - 9,655,586,507 cycles # 3.039 GHz - 26,360,660,752 instructions # 2.73 insn per cycle - 3.178764330 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.624886e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.841031e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.841031e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.447870 sec +INFO: No Floating Point Exceptions have been reported + 8,253,818,162 cycles:u # 3.356 GHz (74.96%) + 9,137,802 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.95%) + 1,274,394,716 stalled-cycles-backend:u # 15.44% backend cycles idle (74.98%) + 26,416,039,672 instructions:u # 3.20 insn per cycle + # 0.05 stalled cycles per insn (74.98%) + 2.463520948 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2312) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.662113e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.998348e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.998348e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.391394 sec -INFO: No Floating Point Exceptions have been reported - 6,882,477,617 cycles # 2.865 GHz - 14,143,328,395 instructions # 2.05 insn per cycle - 2.403055690 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2896) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.491923e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.903415e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.903415e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 1.794681 sec +INFO: No Floating Point Exceptions have been reported + 5,990,026,085 cycles:u # 3.317 GHz (74.86%) + 8,838,657 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.79%) + 1,793,680,422 stalled-cycles-backend:u # 29.94% backend cycles idle (74.80%) + 13,981,160,283 instructions:u # 2.33 insn per cycle + # 0.13 stalled cycles per insn (75.02%) + 1.810757952 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2871) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 +Avg ME (F77/C++) = 2.0288063930599014 +Relative difference = 2.9916108265801754e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.883189e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.244684e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.244684e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.286437 sec -INFO: No Floating Point Exceptions have been reported - 6,540,751,339 cycles # 2.848 GHz - 13,628,461,172 instructions # 2.08 insn per cycle - 2.297769147 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2535) (512y: 302) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.798205e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.010852e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.010852e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 -TOTAL : 2.903935 sec -INFO: No Floating Point Exceptions have been reported - 5,730,017,108 cycles # 1.966 GHz - 9,320,315,455 instructions # 1.63 insn per cycle - 2.915703363 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2060) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288064057068964 -Relative difference = 2.9292737240031234e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 2c8152e371..4790fed1f8 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-10-04_10:25:44 -DATE: 2024-10-02_22:26:19 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.471156e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.836503e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.949285e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.478957 sec -INFO: No Floating Point Exceptions have been reported - 1,977,748,469 cycles # 2.835 GHz - 2,830,254,496 instructions # 1.43 insn per cycle - 0.755464456 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.443417e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.546632e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.548481e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.431395 sec +INFO: No Floating Point Exceptions have been reported + 1,223,660,729 cycles:u # 2.802 GHz (75.64%) + 2,501,735 stalled-cycles-frontend:u # 0.20% frontend cycles idle (76.57%) + 10,246,254 stalled-cycles-backend:u # 0.84% backend cycles idle (75.46%) + 1,631,958,396 instructions:u # 1.33 insn per cycle + # 0.01 stalled cycles per insn (75.08%) + 0.483940074 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.039116e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.228066e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.239026e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.611145 sec -INFO: No Floating Point Exceptions have been reported - 2,507,647,227 cycles # 2.935 GHz - 3,822,892,757 instructions # 1.52 insn per cycle - 0.913494944 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.548785e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.673922e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.676381e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.714706 sec +INFO: No Floating Point Exceptions have been reported + 2,077,515,725 cycles:u # 2.828 GHz (74.26%) + 2,536,517 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.13%) + 8,256,797 stalled-cycles-backend:u # 0.40% backend cycles idle (74.50%) + 2,472,016,862 instructions:u # 1.19 insn per cycle + # 0.00 stalled cycles per insn (75.70%) + 0.775335817 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 +Avg ME (F77/GPU) = 1.4131213684418644 +Relative difference = 4.469239991780462e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.499122e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.511257e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.511257e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.576067 sec -INFO: No Floating Point Exceptions have been reported - 19,987,276,024 cycles # 3.038 GHz - 59,914,208,905 instructions # 3.00 insn per cycle - 6.580288357 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.371112e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.386501e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.386501e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 4.884354 sec +INFO: No Floating Point Exceptions have been reported + 17,047,975,815 cycles:u # 3.488 GHz (74.96%) + 2,450,342 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) + 3,444,330,788 stalled-cycles-backend:u # 20.20% backend cycles idle (74.96%) + 56,934,701,049 instructions:u # 3.34 insn per cycle + # 0.06 stalled cycles per insn (74.97%) + 4.892198702 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1294) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432429 +Relative difference = 4.4692302371173303e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.746815e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.790146e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.790146e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.470619 sec -INFO: No Floating Point Exceptions have been reported - 10,568,573,836 cycles # 3.042 GHz - 31,084,482,719 instructions # 2.94 insn per cycle - 3.474810942 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.558448e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.616297e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.616297e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.522045 sec +INFO: No Floating Point Exceptions have been reported + 8,801,419,969 cycles:u # 3.486 GHz (75.01%) + 1,985,953 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.97%) + 1,760,948,190 stalled-cycles-backend:u # 20.01% backend cycles idle (74.97%) + 29,935,355,243 instructions:u # 3.40 insn per cycle + # 0.06 stalled cycles per insn (74.97%) + 2.529878750 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4647) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432429 +Relative difference = 4.4692302371173303e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.452682e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.618975e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.618975e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.752355 sec -INFO: No Floating Point Exceptions have been reported - 4,998,647,040 cycles # 2.847 GHz - 11,404,728,427 instructions # 2.28 insn per cycle - 1.756553925 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.328649e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.353089e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.353089e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.257226 sec +INFO: No Floating Point Exceptions have been reported + 4,393,002,412 cycles:u # 3.486 GHz (74.66%) + 2,099,409 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.66%) + 1,148,579,182 stalled-cycles-backend:u # 26.15% backend cycles idle (74.96%) + 11,105,205,332 instructions:u # 2.53 insn per cycle + # 0.10 stalled cycles per insn (75.23%) + 1.264599738 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4251) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.066971e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.088589e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.088589e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.554927 sec -INFO: No Floating Point Exceptions have been reported - 4,438,094,520 cycles # 2.847 GHz - 10,663,641,043 instructions # 2.40 insn per cycle - 1.559324939 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 92) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.520624e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.626785e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.626785e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.200273 sec -INFO: No Floating Point Exceptions have been reported - 4,124,597,483 cycles # 1.872 GHz - 5,971,571,779 instructions # 1.45 insn per cycle - 2.204632407 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 94) (512z: 3577) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index 74c8e6c686..ddc33c0955 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,97 +1,77 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-10-04_11:14:57 -DATE: 2024-10-02_23:01:47 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.545911e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.255095e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.255095e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.500354 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,107,574,632 cycles # 2.945 GHz - 3,182,291,906 instructions # 1.51 insn per cycle - 0.772902799 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.225611e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.530645e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.530645e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 0.600089 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,780,961,964 cycles:u # 2.990 GHz (74.01%) + 6,588,994 stalled-cycles-frontend:u # 0.37% frontend cycles idle (76.14%) + 279,320,328 stalled-cycles-backend:u # 15.68% backend cycles idle (76.49%) + 2,180,914,415 instructions:u # 1.22 insn per cycle + # 0.13 stalled cycles per insn (75.80%) + 0.651924943 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.654170e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.373478e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.373478e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.843085 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,201,455,709 cycles # 2.923 GHz - 5,064,301,689 instructions # 1.58 insn per cycle - 1.157821824 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.811687e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.611689e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.611689e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.948724e+03 +- 1.840727e+03 ) GeV^-2 +TOTAL : 1.363596 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,111,137,981 cycles:u # 2.990 GHz (74.47%) + 16,125,606 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.61%) + 835,955,669 stalled-cycles-backend:u # 20.33% backend cycles idle (74.98%) + 4,214,779,200 instructions:u # 1.03 insn per cycle + # 0.20 stalled cycles per insn (74.98%) + 1.436651722 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -99,35 +79,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 +Avg ME (F77/GPU) = 1.4131213684418644 +Relative difference = 4.469239991780462e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.519976e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.532732e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.532732e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.529594 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 19,914,538,030 cycles # 3.049 GHz - 59,920,714,356 instructions # 3.01 insn per cycle - 6.534061095 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.374363e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.389785e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.389785e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 4.883881 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 17,104,860,522 cycles:u # 3.500 GHz (74.96%) + 2,428,687 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) + 3,612,152,787 stalled-cycles-backend:u # 21.12% backend cycles idle (74.96%) + 56,962,728,913 instructions:u # 3.33 insn per cycle + # 0.06 stalled cycles per insn (74.96%) + 4.891525423 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1294) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -135,33 +116,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432429 +Relative difference = 4.4692302371173303e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.734084e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.778629e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.778629e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.488369 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,606,558,779 cycles # 3.037 GHz - 31,134,023,580 instructions # 2.94 insn per cycle - 3.492950294 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.582531e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.640749e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.640749e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.517173 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,819,312,790 cycles:u # 3.499 GHz (74.95%) + 2,247,704 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.93%) + 1,784,640,885 stalled-cycles-backend:u # 20.24% backend cycles idle (74.93%) + 29,976,004,853 instructions:u # 3.40 insn per cycle + # 0.06 stalled cycles per insn (74.93%) + 2.524864599 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4647) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -169,33 +153,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432429 +Relative difference = 4.4692302371173303e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.451546e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.625575e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.625575e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.760502 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,040,359,107 cycles # 2.857 GHz - 11,455,585,139 instructions # 2.27 insn per cycle - 1.764980096 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.328730e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.352996e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.352996e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.260923 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,408,283,040 cycles:u # 3.487 GHz (74.70%) + 2,316,722 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.74%) + 1,152,356,331 stalled-cycles-backend:u # 26.14% backend cycles idle (75.06%) + 11,138,060,442 instructions:u # 2.53 insn per cycle + # 0.10 stalled cycles per insn (75.33%) + 1.268502762 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4251) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -203,80 +190,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.064061e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.085709e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.085709e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.566477 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,478,283,811 cycles # 2.852 GHz - 10,714,144,344 instructions # 2.39 insn per cycle - 1.571016295 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 92) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.519249e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.630304e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.630304e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.208574 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,162,288,033 cycles # 1.882 GHz - 6,009,903,592 instructions # 1.44 insn per cycle - 2.213156087 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 94) (512z: 3577) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index 2504d6cb2f..c1e0e45788 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-10-04_10:25:58 -DATE: 2024-10-02_22:26:44 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.573081e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.880652e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.992912e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.473448 sec -INFO: No Floating Point Exceptions have been reported - 1,997,107,285 cycles # 2.887 GHz - 2,802,455,481 instructions # 1.40 insn per cycle - 0.748795790 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.465238e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.566482e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.568358e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.418222 sec +INFO: No Floating Point Exceptions have been reported + 1,183,474,852 cycles:u # 2.738 GHz (75.76%) + 2,497,591 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.74%) + 4,803,862 stalled-cycles-backend:u # 0.41% backend cycles idle (75.22%) + 1,692,488,285 instructions:u # 1.43 insn per cycle + # 0.00 stalled cycles per insn (74.30%) + 0.470754483 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.042916e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.233761e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.244311e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.612101 sec -INFO: No Floating Point Exceptions have been reported - 2,523,217,642 cycles # 2.962 GHz - 3,820,710,011 instructions # 1.51 insn per cycle - 0.913471570 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.554225e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.680598e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.683051e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.710862 sec +INFO: No Floating Point Exceptions have been reported + 2,017,250,246 cycles:u # 2.760 GHz (75.44%) + 2,412,871 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.92%) + 6,852,730 stalled-cycles-backend:u # 0.34% backend cycles idle (75.94%) + 2,437,580,973 instructions:u # 1.21 insn per cycle + # 0.00 stalled cycles per insn (75.64%) + 0.774920863 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418649 -Relative difference = 4.469239988637851e-07 +Avg ME (F77/GPU) = 1.4131213684418644 +Relative difference = 4.469239991780462e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.478144e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.490358e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.490358e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.631814 sec -INFO: No Floating Point Exceptions have been reported - 19,904,693,493 cycles # 3.001 GHz - 60,129,356,320 instructions # 3.02 insn per cycle - 6.635977885 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.535419e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.552435e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.552435e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 4.658100 sec +INFO: No Floating Point Exceptions have been reported + 16,269,519,657 cycles:u # 3.490 GHz (74.96%) + 2,449,188 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.94%) + 3,788,080,771 stalled-cycles-backend:u # 23.28% backend cycles idle (74.94%) + 56,645,841,981 instructions:u # 3.48 insn per cycle + # 0.07 stalled cycles per insn (74.93%) + 4.665223880 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 924) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432427 +Relative difference = 4.4692302386886357e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.788891e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.832354e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.832354e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.440533 sec -INFO: No Floating Point Exceptions have been reported - 10,474,336,033 cycles # 3.041 GHz - 30,686,738,264 instructions # 2.93 insn per cycle - 3.444912048 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.323977e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.378429e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.378429e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.614471 sec +INFO: No Floating Point Exceptions have been reported + 9,147,034,129 cycles:u # 3.495 GHz (74.94%) + 2,026,594 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.94%) + 2,648,071,523 stalled-cycles-backend:u # 28.95% backend cycles idle (74.94%) + 30,366,242,847 instructions:u # 3.32 insn per cycle + # 0.09 stalled cycles per insn (74.94%) + 2.621658552 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4697) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432433 -Relative difference = 4.46923023397472e-07 +Avg ME (F77/C++) = 1.4131213684432431 +Relative difference = 4.4692302355460254e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.260057e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.421960e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.421960e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.788469 sec -INFO: No Floating Point Exceptions have been reported - 5,127,771,337 cycles # 2.862 GHz - 11,838,347,484 instructions # 2.31 insn per cycle - 1.792570031 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4746) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.233547e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.254406e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.254406e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.351825 sec +INFO: No Floating Point Exceptions have been reported + 4,729,834,556 cycles:u # 3.491 GHz (74.66%) + 1,846,450 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.73%) + 1,487,747,818 stalled-cycles-backend:u # 31.45% backend cycles idle (75.02%) + 11,735,041,331 instructions:u # 2.48 insn per cycle + # 0.13 stalled cycles per insn (75.20%) + 1.358925233 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4465) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.006530e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.025807e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.025807e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.647024 sec -INFO: No Floating Point Exceptions have been reported - 4,720,484,931 cycles # 2.860 GHz - 11,163,899,176 instructions # 2.36 insn per cycle - 1.651308834 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4403) (512y: 246) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416466 -Relative difference = 4.469241533230934e-07 -OK (relative difference <= 5E-3) -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.518189e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.624521e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.624521e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.200607 sec -INFO: No Floating Point Exceptions have been reported - 4,154,063,919 cycles # 1.885 GHz - 6,222,924,057 instructions # 1.50 insn per cycle - 2.204886027 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1516) (512y: 139) (512z: 3679) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213684416484 Relative difference = 4.469241520660492e-07 OK (relative difference <= 5E-3) ========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index e312f04d1e..90704b15e2 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-10-04_10:26:11 -DATE: 2024-10-02_22:27:09 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.675849e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.049912e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.089991e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.458226 sec -INFO: No Floating Point Exceptions have been reported - 1,987,161,645 cycles # 2.947 GHz - 2,815,757,381 instructions # 1.42 insn per cycle - 0.732664597 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 3.186904e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.694908e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.703402e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.415273e+04 +- 1.288237e+04 ) GeV^-2 +TOTAL : 0.357486 sec +INFO: No Floating Point Exceptions have been reported + 949,950,868 cycles:u # 2.571 GHz (74.71%) + 2,550,775 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.27%) + 5,518,485 stalled-cycles-backend:u # 0.58% backend cycles idle (73.93%) + 1,513,211,065 instructions:u # 1.59 insn per cycle + # 0.00 stalled cycles per insn (74.28%) + 0.411426189 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.675349e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.381609e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.425889e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 -TOTAL : 0.509054 sec -INFO: No Floating Point Exceptions have been reported - 2,180,524,483 cycles # 2.942 GHz - 3,107,964,411 instructions # 1.43 insn per cycle - 0.800068245 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.009358e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.058985e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.073606e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.619625e+05 +- 1.611328e+05 ) GeV^-2 +TOTAL : 0.519372 sec +INFO: No Floating Point Exceptions have been reported + 1,488,011,322 cycles:u # 2.781 GHz (75.20%) + 2,501,614 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.20%) + 5,177,253 stalled-cycles-backend:u # 0.35% backend cycles idle (74.20%) + 1,942,025,908 instructions:u # 1.31 insn per cycle + # 0.00 stalled cycles per insn (73.48%) + 0.575349084 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412607e+00 -Avg ME (F77/GPU) = 1.4132214305330990 -Relative difference = 0.0004349621183379836 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.412410e+00 +Avg ME (F77/GPU) = 1.4131674300257941 +Relative difference = 0.0005362678158567296 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.601007e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.614246e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.614246e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.317543 sec -INFO: No Floating Point Exceptions have been reported - 19,251,894,030 cycles # 3.046 GHz - 59,613,754,091 instructions # 3.10 insn per cycle - 6.321648054 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.700255e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.719603e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.719603e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 4.450045 sec +INFO: No Floating Point Exceptions have been reported + 15,564,829,810 cycles:u # 3.496 GHz (74.94%) + 1,889,922 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.03%) + 2,435,487,135 stalled-cycles-backend:u # 15.65% backend cycles idle (75.03%) + 56,541,733,242 instructions:u # 3.63 insn per cycle + # 0.04 stalled cycles per insn (75.03%) + 4.457091697 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1190) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949096991936 -Relative difference = 6.390737857384068e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129859809517598 +Relative difference = 1.3480841507557613e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.351291e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.489859e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.489859e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.978919 sec -INFO: No Floating Point Exceptions have been reported - 6,013,687,882 cycles # 3.034 GHz - 17,062,971,129 instructions # 2.84 insn per cycle - 1.983047133 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.147844e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.166747e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.166747e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 +TOTAL : 1.449197 sec +INFO: No Floating Point Exceptions have been reported + 5,067,109,496 cycles:u # 3.490 GHz (74.67%) + 1,374,641 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.81%) + 1,585,658,388 stalled-cycles-backend:u # 31.29% backend cycles idle (75.09%) + 16,235,790,558 instructions:u # 3.20 insn per cycle + # 0.10 stalled cycles per insn (75.21%) + 1.456282420 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5124) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129857731430207 +Relative difference = 1.6055147002442227e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.804689e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.868315e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.868315e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.925391 sec -INFO: No Floating Point Exceptions have been reported - 2,640,566,333 cycles # 2.843 GHz - 6,187,446,358 instructions # 2.34 insn per cycle - 0.929575730 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.476791e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.563653e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.563653e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 +TOTAL : 0.683006 sec +INFO: No Floating Point Exceptions have been reported + 2,394,357,438 cycles:u # 3.491 GHz (74.61%) + 1,766,897 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.36%) + 742,313,096 stalled-cycles-backend:u # 31.00% backend cycles idle (74.44%) + 6,040,131,133 instructions:u # 2.52 insn per cycle + # 0.12 stalled cycles per insn (75.02%) + 0.690178868 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4734) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133162101620087 +Relative difference = 1.4870135814264702e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.998130e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.078369e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.078369e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.837375 sec -INFO: No Floating Point Exceptions have been reported - 2,403,180,656 cycles # 2.859 GHz - 5,790,065,517 instructions # 2.41 insn per cycle - 0.841354194 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4906) (512y: 37) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.523426e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.570346e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.570346e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.095188 sec -INFO: No Floating Point Exceptions have been reported - 2,074,566,855 cycles # 1.888 GHz - 3,391,536,157 instructions # 1.63 insn per cycle - 1.099528954 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 37) (512z: 3789) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index 316a025050..c796d650cd 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,97 +1,77 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-10-04_11:15:12 -DATE: 2024-10-02_23:02:12 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.524999e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.496444e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.496444e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 -TOTAL : 0.466645 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,011,613,909 cycles # 2.942 GHz - 2,949,378,989 instructions # 1.47 insn per cycle - 0.740958646 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 2.313066e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.769718e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.769718e+06 ) sec^-1 +MeanMatrixElemValue = ( 4.755508e+02 +- 2.671054e+02 ) GeV^-2 +TOTAL : 0.510844 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,532,392,567 cycles:u # 2.908 GHz (75.38%) + 10,395,405 stalled-cycles-frontend:u # 0.68% frontend cycles idle (74.22%) + 255,461,743 stalled-cycles-backend:u # 16.67% backend cycles idle (74.22%) + 1,965,347,850 instructions:u # 1.28 insn per cycle + # 0.13 stalled cycles per insn (73.19%) + 0.559680752 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.680079e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.266918e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.266918e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.737499e+02 +- 4.776369e+02 ) GeV^-2 -TOTAL : 0.645054 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,639,460,011 cycles # 2.993 GHz - 4,010,655,501 instructions # 1.52 insn per cycle - 0.939491422 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.573134e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.558732e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.558732e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.855939e+03 +- 1.791987e+03 ) GeV^-2 +TOTAL : 1.126462 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,475,500,716 cycles:u # 3.024 GHz (75.07%) + 29,682,134 stalled-cycles-frontend:u # 0.85% frontend cycles idle (74.64%) + 835,431,380 stalled-cycles-backend:u # 24.04% backend cycles idle (74.68%) + 3,788,788,425 instructions:u # 1.09 insn per cycle + # 0.22 stalled cycles per insn (74.77%) + 1.185344790 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -99,35 +79,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412607e+00 -Avg ME (F77/GPU) = 1.4132214305330990 -Relative difference = 0.0004349621183379836 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.412410e+00 +Avg ME (F77/GPU) = 1.4131674300257941 +Relative difference = 0.0005362678158567296 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.574010e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.587324e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.587324e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.387615 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 19,269,777,585 cycles # 3.015 GHz - 59,617,998,643 instructions # 3.09 insn per cycle - 6.391840570 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.713043e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.732413e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.732413e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 4.436399 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 15,517,029,137 cycles:u # 3.495 GHz (74.95%) + 2,415,118 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.95%) + 2,404,612,148 stalled-cycles-backend:u # 15.50% backend cycles idle (74.99%) + 56,652,779,707 instructions:u # 3.65 insn per cycle + # 0.04 stalled cycles per insn (75.08%) + 4.443610629 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1190) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -135,33 +116,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949096991936 -Relative difference = 6.390737857384068e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129859809517598 +Relative difference = 1.3480841507557613e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.399391e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.540572e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.540572e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.972149 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,029,722,967 cycles # 3.052 GHz - 17,109,872,648 instructions # 2.84 insn per cycle - 1.976404451 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.155179e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.174142e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.174142e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 +TOTAL : 1.442917 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,052,789,110 cycles:u # 3.495 GHz (74.89%) + 1,393,507 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.10%) + 1,543,898,572 stalled-cycles-backend:u # 30.56% backend cycles idle (75.11%) + 16,257,391,621 instructions:u # 3.22 insn per cycle + # 0.09 stalled cycles per insn (75.11%) + 1.450066534 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5124) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -169,33 +153,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129857731430207 +Relative difference = 1.6055147002442227e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.805556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.869603e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.869603e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.929046 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,658,179,637 cycles # 2.850 GHz - 6,224,135,366 instructions # 2.34 insn per cycle - 0.933362485 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.467698e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.553341e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.553341e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 +TOTAL : 0.687943 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,390,771,196 cycles:u # 3.460 GHz (74.53%) + 1,674,608 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.53%) + 739,949,017 stalled-cycles-backend:u # 30.95% backend cycles idle (74.87%) + 6,072,314,919 instructions:u # 2.54 insn per cycle + # 0.12 stalled cycles per insn (75.45%) + 0.695001619 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4734) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -203,80 +190,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133162101620087 +Relative difference = 1.4870135814264702e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.997018e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.074315e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.074315e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.841770 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,421,588,452 cycles # 2.865 GHz - 5,827,320,634 instructions # 2.41 insn per cycle - 0.845895734 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4906) (512y: 37) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.537158e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.584935e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.584935e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.089934 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,096,708,167 cycles # 1.917 GHz - 3,432,903,656 instructions # 1.64 insn per cycle - 1.094288094 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 37) (512z: 3789) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index a72633a312..8ec9721fb6 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-10-04_10:26:22 -DATE: 2024-10-02_22:27:30 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.649129e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.022553e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.063512e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 -TOTAL : 0.453345 sec -INFO: No Floating Point Exceptions have been reported - 1,975,862,611 cycles # 2.945 GHz - 2,757,171,653 instructions # 1.40 insn per cycle - 0.728260674 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 3.331902e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.830401e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.838695e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.415273e+04 +- 1.288237e+04 ) GeV^-2 +TOTAL : 0.354115 sec +INFO: No Floating Point Exceptions have been reported + 934,588,835 cycles:u # 2.542 GHz (76.62%) + 2,338,444 stalled-cycles-frontend:u # 0.25% frontend cycles idle (73.53%) + 9,513,633 stalled-cycles-backend:u # 1.02% backend cycles idle (71.64%) + 1,554,664,984 instructions:u # 1.66 insn per cycle + # 0.01 stalled cycles per insn (74.42%) + 0.406943143 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.669823e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.371781e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.417808e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 -TOTAL : 0.507852 sec -INFO: No Floating Point Exceptions have been reported - 2,173,149,896 cycles # 2.944 GHz - 3,150,374,983 instructions # 1.45 insn per cycle - 0.795545558 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.749540e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.744497e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.759194e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.619625e+05 +- 1.611328e+05 ) GeV^-2 +TOTAL : 0.514411 sec +INFO: No Floating Point Exceptions have been reported + 1,416,473,483 cycles:u # 2.667 GHz (75.74%) + 2,369,760 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.50%) + 9,030,285 stalled-cycles-backend:u # 0.64% backend cycles idle (75.36%) + 1,905,808,052 instructions:u # 1.35 insn per cycle + # 0.00 stalled cycles per insn (75.20%) + 0.571806967 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.412607e+00 -Avg ME (F77/GPU) = 1.4132214305330990 -Relative difference = 0.0004349621183379836 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.412410e+00 +Avg ME (F77/GPU) = 1.4131674300257941 +Relative difference = 0.0005362678158567296 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.581112e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.594237e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.594237e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 6.365783 sec -INFO: No Floating Point Exceptions have been reported - 19,419,491,454 cycles # 3.049 GHz - 59,350,763,877 instructions # 3.06 insn per cycle - 6.369878540 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.764713e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.784565e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.784565e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 +TOTAL : 4.373695 sec +INFO: No Floating Point Exceptions have been reported + 15,305,777,282 cycles:u # 3.497 GHz (74.97%) + 1,857,464 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) + 2,679,974,053 stalled-cycles-backend:u # 17.51% backend cycles idle (74.96%) + 56,406,318,615 instructions:u # 3.69 insn per cycle + # 0.05 stalled cycles per insn (74.96%) + 4.380804919 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1124) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129949096991936 -Relative difference = 6.390737857384068e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129859511640177 +Relative difference = 3.456225494743424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.722765e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.878130e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.878130e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 -TOTAL : 1.895285 sec -INFO: No Floating Point Exceptions have been reported - 5,768,191,166 cycles # 3.038 GHz - 16,850,391,369 instructions # 2.92 insn per cycle - 1.899458861 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5610) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.148887e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168173e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.168173e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 +TOTAL : 1.447872 sec +INFO: No Floating Point Exceptions have been reported + 5,058,532,173 cycles:u # 3.487 GHz (74.79%) + 2,559,973 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.07%) + 1,503,389,024 stalled-cycles-backend:u # 29.72% backend cycles idle (75.19%) + 16,330,983,548 instructions:u # 3.23 insn per cycle + # 0.09 stalled cycles per insn (75.19%) + 1.454792272 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5045) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412995e+00 -Avg ME (F77/C++) = 1.4129954647353316 -Relative difference = 3.2890090308261873e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412986e+00 +Avg ME (F77/C++) = 1.4129858306637857 +Relative difference = 1.1984281117008586e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.566708e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.614620e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.614620e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.063083 sec -INFO: No Floating Point Exceptions have been reported - 3,015,561,521 cycles # 2.827 GHz - 6,848,133,630 instructions # 2.27 insn per cycle - 1.067048166 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5735) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.142260e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.206581e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.206581e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 +TOTAL : 0.786409 sec +INFO: No Floating Point Exceptions have been reported + 2,741,188,065 cycles:u # 3.473 GHz (74.67%) + 1,874,301 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.67%) + 828,334,786 stalled-cycles-backend:u # 30.22% backend cycles idle (74.67%) + 6,730,777,833 instructions:u # 2.46 insn per cycle + # 0.12 stalled cycles per insn (74.96%) + 0.793478400 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5386) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133162101620087 +Relative difference = 1.4870135814264702e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.699136e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.754996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.754996e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 0.981580 sec -INFO: No Floating Point Exceptions have been reported - 2,791,734,989 cycles # 2.834 GHz - 6,437,581,289 instructions # 2.31 insn per cycle - 0.985661400 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5509) (512y: 23) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413313e+00 -Avg ME (F77/C++) = 1.4133132969790267 -Relative difference = 2.1012969292986113e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.392917e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.431841e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.431841e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 -TOTAL : 1.195865 sec -INFO: No Floating Point Exceptions have been reported - 2,253,891,023 cycles # 1.880 GHz - 3,755,508,897 instructions # 1.67 insn per cycle - 1.200023887 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 28) (512z: 4084) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133164033579249 -Relative difference = 2.85398258307829e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 0b1d518f1a..be15d7acf8 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-10-04_10:26:33 -DATE: 2024-10-02_22:27:51 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.453948e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.811550e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.927121e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.473105 sec -INFO: No Floating Point Exceptions have been reported - 2,033,581,083 cycles # 2.945 GHz - 2,886,020,774 instructions # 1.42 insn per cycle - 0.747799818 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.446821e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.550657e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.552436e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.420211 sec +INFO: No Floating Point Exceptions have been reported + 1,217,917,962 cycles:u # 2.805 GHz (75.50%) + 2,572,917 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.70%) + 5,684,122 stalled-cycles-backend:u # 0.47% backend cycles idle (75.40%) + 1,664,013,530 instructions:u # 1.37 insn per cycle + # 0.00 stalled cycles per insn (75.24%) + 0.472252045 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.031801e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.220510e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.231086e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.618243 sec -INFO: No Floating Point Exceptions have been reported - 2,476,239,534 cycles # 2.865 GHz - 3,788,069,315 instructions # 1.53 insn per cycle - 0.921690466 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.569439e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.691928e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.694396e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.714700 sec +INFO: No Floating Point Exceptions have been reported + 2,021,505,861 cycles:u # 2.755 GHz (75.85%) + 2,526,037 stalled-cycles-frontend:u # 0.12% frontend cycles idle (76.04%) + 10,206,457 stalled-cycles-backend:u # 0.50% backend cycles idle (75.40%) + 2,379,846,608 instructions:u # 1.18 insn per cycle + # 0.00 stalled cycles per insn (75.47%) + 0.779298099 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 +Avg ME (F77/GPU) = 1.4131213755569483 +Relative difference = 4.4188898885662695e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.460583e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.472611e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.472611e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.679183 sec -INFO: No Floating Point Exceptions have been reported - 20,182,288,201 cycles # 3.020 GHz - 60,947,365,488 instructions # 3.02 insn per cycle - 6.683352736 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.367591e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.382854e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.382854e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 4.889428 sec +INFO: No Floating Point Exceptions have been reported + 17,111,064,706 cycles:u # 3.498 GHz (75.00%) + 2,437,133 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) + 3,998,176,859 stalled-cycles-backend:u # 23.37% backend cycles idle (74.98%) + 57,731,287,493 instructions:u # 3.37 insn per cycle + # 0.07 stalled cycles per insn (74.98%) + 4.896906963 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1219) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.800189e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.844205e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.844205e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.432628 sec -INFO: No Floating Point Exceptions have been reported - 10,469,819,938 cycles # 3.047 GHz - 30,821,820,054 instructions # 2.94 insn per cycle - 3.436918127 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.454121e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.510241e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.510241e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.562565 sec +INFO: No Floating Point Exceptions have been reported + 8,978,187,022 cycles:u # 3.500 GHz (74.87%) + 395,957 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.00%) + 2,249,240,551 stalled-cycles-backend:u # 25.05% backend cycles idle (75.06%) + 29,645,099,918 instructions:u # 3.30 insn per cycle + # 0.08 stalled cycles per insn (75.06%) + 2.569887817 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4755) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.488717e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.659662e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.659662e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.746217 sec -INFO: No Floating Point Exceptions have been reported - 4,956,337,420 cycles # 2.833 GHz - 11,358,030,238 instructions # 2.29 insn per cycle - 1.750493549 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4776) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.337143e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.361759e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.361759e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.248762 sec +INFO: No Floating Point Exceptions have been reported + 4,378,842,882 cycles:u # 3.498 GHz (74.86%) + 1,908,946 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.08%) + 1,206,536,527 stalled-cycles-backend:u # 27.55% backend cycles idle (75.08%) + 11,042,976,514 instructions:u # 2.52 insn per cycle + # 0.11 stalled cycles per insn (75.08%) + 1.256226988 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4405) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.087485e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.109461e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.109461e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.526196 sec -INFO: No Floating Point Exceptions have been reported - 4,378,050,988 cycles # 2.862 GHz - 10,608,750,677 instructions # 2.42 insn per cycle - 1.530411654 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4503) (512y: 84) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.342670e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.443900e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.443900e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.253273 sec -INFO: No Floating Point Exceptions have been reported - 4,230,871,375 cycles # 1.875 GHz - 6,168,087,523 instructions # 1.46 insn per cycle - 2.257413172 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2143) (512y: 116) (512z: 3653) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213786174055 -Relative difference = 4.3972324717191576e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index e4a40e8315..dc83255293 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +DATE: 2024-10-04_10:26:47 -DATE: 2024-10-02_22:28:16 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.542800e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.917661e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.043581e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 0.476455 sec -INFO: No Floating Point Exceptions have been reported - 2,040,505,669 cycles # 2.943 GHz - 2,877,681,232 instructions # 1.41 insn per cycle - 0.752591733 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.437038e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.540938e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.542765e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 +TOTAL : 0.419292 sec +INFO: No Floating Point Exceptions have been reported + 1,183,272,557 cycles:u # 2.737 GHz (75.83%) + 2,546,572 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.90%) + 5,185,977 stalled-cycles-backend:u # 0.44% backend cycles idle (74.50%) + 1,665,315,705 instructions:u # 1.41 insn per cycle + # 0.00 stalled cycles per insn (74.52%) + 0.470888876 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.038811e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.230331e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.241436e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 -TOTAL : 0.611030 sec -INFO: No Floating Point Exceptions have been reported - 2,506,600,773 cycles # 2.949 GHz - 3,681,760,020 instructions # 1.47 insn per cycle - 0.910379508 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.552249e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.674201e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.676662e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 +TOTAL : 0.706152 sec +INFO: No Floating Point Exceptions have been reported + 2,033,734,755 cycles:u # 2.796 GHz (76.00%) + 2,524,051 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.21%) + 10,374,444 stalled-cycles-backend:u # 0.51% backend cycles idle (73.94%) + 2,447,976,048 instructions:u # 1.20 insn per cycle + # 0.00 stalled cycles per insn (73.55%) + 0.767744463 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569487 -Relative difference = 4.418889885423659e-07 +Avg ME (F77/GPU) = 1.4131213755569483 +Relative difference = 4.4188898885662695e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.449767e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.461764e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.461764e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 6.708236 sec -INFO: No Floating Point Exceptions have been reported - 20,306,339,981 cycles # 3.026 GHz - 61,171,716,860 instructions # 3.01 insn per cycle - 6.712534448 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.503618e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.520129e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.520129e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 4.700248 sec +INFO: No Floating Point Exceptions have been reported + 16,447,289,759 cycles:u # 3.497 GHz (75.01%) + 2,477,835 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) + 3,270,078,877 stalled-cycles-backend:u # 19.88% backend cycles idle (75.00%) + 57,493,893,321 instructions:u # 3.50 insn per cycle + # 0.06 stalled cycles per insn (75.00%) + 4.710930850 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 866) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.866725e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.912249e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.912249e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 3.385607 sec -INFO: No Floating Point Exceptions have been reported - 10,321,183,247 cycles # 3.045 GHz - 30,532,396,911 instructions # 2.96 insn per cycle - 3.389791787 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5155) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.700429e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.760941e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.760941e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 2.468976 sec +INFO: No Floating Point Exceptions have been reported + 8,641,928,544 cycles:u # 3.496 GHz (74.81%) + 2,103,592 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.92%) + 1,771,646,315 stalled-cycles-backend:u # 20.50% backend cycles idle (75.06%) + 30,122,551,249 instructions:u # 3.49 insn per cycle + # 0.06 stalled cycles per insn (75.08%) + 2.476571876 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4834) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.169860e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.331537e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.331537e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.806172 sec -INFO: No Floating Point Exceptions have been reported - 5,142,039,126 cycles # 2.841 GHz - 11,872,343,877 instructions # 2.31 insn per cycle - 1.810450515 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4887) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.248849e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.270219e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.270219e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 +TOTAL : 1.335543 sec +INFO: No Floating Point Exceptions have been reported + 4,669,165,070 cycles:u # 3.488 GHz (74.90%) + 2,234,864 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.90%) + 1,490,886,221 stalled-cycles-backend:u # 31.93% backend cycles idle (74.90%) + 11,673,442,224 instructions:u # 2.50 insn per cycle + # 0.13 stalled cycles per insn (74.90%) + 1.342804791 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4625) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.017735e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.037222e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.037222e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 1.629135 sec -INFO: No Floating Point Exceptions have been reported - 4,678,302,214 cycles # 2.865 GHz - 11,166,912,050 instructions # 2.39 insn per cycle - 1.633419328 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4508) (512y: 239) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213600217192 -Relative difference = 4.5288254008796884e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.334630e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.438622e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.438622e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 -TOTAL : 2.255377 sec -INFO: No Floating Point Exceptions have been reported - 4,246,914,613 cycles # 1.880 GHz - 6,410,235,153 instructions # 1.51 insn per cycle - 2.259677657 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2039) (512y: 162) (512z: 3731) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213786174055 -Relative difference = 4.3972324717191576e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 93a6bfaa86..e3e0c6693f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_10:27:00 -DATE: 2024-10-02_22:28:41 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.315412e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.344135e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.346271e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.536787 sec -INFO: No Floating Point Exceptions have been reported - 2,272,867,740 cycles # 2.957 GHz - 3,556,184,244 instructions # 1.56 insn per cycle - 0.829093650 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.208150e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.259078e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.259226e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.624262 sec +INFO: No Floating Point Exceptions have been reported + 1,851,381,223 cycles:u # 2.964 GHz (74.24%) + 2,899,022 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.80%) + 28,953,261 stalled-cycles-backend:u # 1.56% backend cycles idle (74.16%) + 2,071,880,732 instructions:u # 1.12 insn per cycle + # 0.01 stalled cycles per insn (74.39%) + 0.676502748 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.139015e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.169154e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.170337e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.043985 sec -INFO: No Floating Point Exceptions have been reported - 9,922,374,295 cycles # 3.004 GHz - 22,624,836,598 instructions # 2.28 insn per cycle - 3.359970198 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.807571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.813781e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.813898e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.039224 sec +INFO: No Floating Point Exceptions have been reported + 20,636,709,348 cycles:u # 3.405 GHz (75.16%) + 3,160,012 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.05%) + 8,104,094 stalled-cycles-backend:u # 0.04% backend cycles idle (74.82%) + 18,528,863,482 instructions:u # 0.90 insn per cycle + # 0.00 stalled cycles per insn (74.79%) + 6.105402981 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.936959e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.937903e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.937903e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.473447 sec -INFO: No Floating Point Exceptions have been reported - 25,631,294,284 cycles # 3.024 GHz - 78,955,065,792 instructions # 3.08 insn per cycle - 8.477634665 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.664747e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.665966e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.665966e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.162519 sec +INFO: No Floating Point Exceptions have been reported + 21,588,585,412 cycles:u # 3.501 GHz (74.97%) + 3,703,442 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.96%) + 3,063,072,888 stalled-cycles-backend:u # 14.19% backend cycles idle (74.96%) + 78,071,257,559 instructions:u # 3.62 insn per cycle + # 0.04 stalled cycles per insn (74.94%) + 6.169930605 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.626289e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.629595e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.629595e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.529195 sec -INFO: No Floating Point Exceptions have been reported - 13,151,239,745 cycles # 2.901 GHz - 39,558,608,970 instructions # 3.01 insn per cycle - 4.533411053 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.451001e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.456090e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.456090e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.016908 sec +INFO: No Floating Point Exceptions have been reported + 10,568,442,816 cycles:u # 3.500 GHz (74.85%) + 452,444 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.94%) + 1,438,678,493 stalled-cycles-backend:u # 13.61% backend cycles idle (75.06%) + 39,407,284,020 instructions:u # 3.73 insn per cycle + # 0.04 stalled cycles per insn (75.10%) + 3.024966897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.338008e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.354821e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.354821e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.973498 sec -INFO: No Floating Point Exceptions have been reported - 5,607,402,462 cycles # 2.836 GHz - 13,823,390,464 instructions # 2.47 insn per cycle - 1.977813759 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.231004e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.233582e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.233582e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.340250 sec +INFO: No Floating Point Exceptions have been reported + 4,701,384,029 cycles:u # 3.500 GHz (74.92%) + 1,685,243 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.99%) + 414,413,134 stalled-cycles-backend:u # 8.81% backend cycles idle (74.99%) + 13,815,059,162 instructions:u # 2.94 insn per cycle + # 0.03 stalled cycles per insn (74.99%) + 1.348496912 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.523267e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.545652e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.545652e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.728657 sec -INFO: No Floating Point Exceptions have been reported - 4,913,666,819 cycles # 2.837 GHz - 12,505,073,837 instructions # 2.54 insn per cycle - 1.733007927 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.360564e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.374844e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.374844e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.234846 sec -INFO: No Floating Point Exceptions have been reported - 4,137,413,855 cycles # 1.848 GHz - 6,391,961,816 instructions # 1.54 insn per cycle - 2.239204941 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index b5935c9801..5cfdad968d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,97 +1,77 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_11:15:38 -DATE: 2024-10-02_23:02:58 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.989124e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.283210e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.283210e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.521005 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,237,566,944 cycles # 2.967 GHz - 3,555,564,718 instructions # 1.59 insn per cycle - 0.813310962 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.222149e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.259480e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.259480e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.571352 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,639,028,607 cycles:u # 2.908 GHz (75.21%) + 3,355,204 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.89%) + 35,988,900 stalled-cycles-backend:u # 2.20% backend cycles idle (75.26%) + 1,957,806,439 instructions:u # 1.19 insn per cycle + # 0.02 stalled cycles per insn (75.89%) + 0.624643467 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.655915e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.126232e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.126232e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.296128 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,826,956,183 cycles # 3.021 GHz - 24,051,339,768 instructions # 2.22 insn per cycle - 3.639963445 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.737936e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.807078e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.807078e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 +TOTAL : 6.879795 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 23,414,630,300 cycles:u # 3.385 GHz (75.07%) + 39,088,022 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.10%) + 1,100,759,510 stalled-cycles-backend:u # 4.70% backend cycles idle (75.11%) + 20,802,131,066 instructions:u # 0.89 insn per cycle + # 0.05 stalled cycles per insn (75.02%) + 6.955289687 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -99,35 +79,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.953031e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.954015e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.954015e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.407967 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 25,656,461,445 cycles # 3.050 GHz - 78,961,398,849 instructions # 3.08 insn per cycle - 8.412477675 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.667664e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.668909e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.668909e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.159060 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 21,563,379,147 cycles:u # 3.499 GHz (74.95%) + 855,839 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.95%) + 3,176,265,684 stalled-cycles-backend:u # 14.73% backend cycles idle (74.98%) + 78,148,060,715 instructions:u # 3.62 insn per cycle + # 0.04 stalled cycles per insn (75.04%) + 6.167497131 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -135,33 +116,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.660154e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.664629e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.664629e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.493797 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 13,079,088,056 cycles # 2.909 GHz - 39,574,928,422 instructions # 3.03 insn per cycle - 4.498177013 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.470630e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.475832e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.475832e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.009493 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,535,457,620 cycles:u # 3.497 GHz (74.99%) + 475,430 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.04%) + 1,392,708,626 stalled-cycles-backend:u # 13.22% backend cycles idle (75.04%) + 39,356,377,208 instructions:u # 3.74 insn per cycle + # 0.04 stalled cycles per insn (75.04%) + 3.017223130 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -169,33 +153,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.225316e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.242363e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.242363e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.004442 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,626,816,053 cycles # 2.802 GHz - 13,835,486,332 instructions # 2.46 insn per cycle - 2.009028620 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.243155e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.245914e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.245914e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.331271 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,656,550,883 cycles:u # 3.489 GHz (74.86%) + 672,946 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.83%) + 419,855,499 stalled-cycles-backend:u # 9.02% backend cycles idle (74.83%) + 13,812,073,425 instructions:u # 2.97 insn per cycle + # 0.03 stalled cycles per insn (74.83%) + 1.338874604 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -203,80 +190,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.559024e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.583873e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.583873e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.726859 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,933,984,591 cycles # 2.851 GHz - 12,515,815,938 instructions # 2.54 insn per cycle - 1.731571167 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.374751e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.389187e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.389187e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.234434 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,161,174,225 cycles # 1.859 GHz - 6,403,903,805 instructions # 1.54 insn per cycle - 2.238967112 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index 8e9f4dbb7f..e0442f707e 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_11:20:21 -DATE: 2024-10-02_23:13:39 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.309339e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.337150e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.338770e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.192547e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.254131e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.254280e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.515639 sec -INFO: No Floating Point Exceptions have been reported - 2,211,990,760 cycles # 2.964 GHz - 3,494,673,373 instructions # 1.58 insn per cycle - 0.807662245 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +TOTAL : 0.545325 sec +INFO: No Floating Point Exceptions have been reported + 1,666,928,797 cycles:u # 2.985 GHz (74.33%) + 3,209,148 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.59%) + 34,156,836 stalled-cycles-backend:u # 2.05% backend cycles idle (75.58%) + 1,984,746,124 instructions:u # 1.19 insn per cycle + # 0.02 stalled cycles per insn (74.33%) + 0.591706659 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.142294e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.173330e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.174533e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.806735e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.814291e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.814407e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 3.131700 sec -INFO: No Floating Point Exceptions have been reported - 10,175,482,357 cycles # 3.002 GHz - 23,150,986,357 instructions # 2.28 insn per cycle - 3.445678001 seconds time elapsed +TOTAL : 6.713214 sec +INFO: No Floating Point Exceptions have been reported + 22,947,093,828 cycles:u # 3.405 GHz (75.02%) + 28,527,633 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.97%) + 1,109,103,473 stalled-cycles-backend:u # 4.83% backend cycles idle (74.95%) + 19,994,352,529 instructions:u # 0.87 insn per cycle + # 0.06 stalled cycles per insn (74.90%) + 6.777028620 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.957758e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.958752e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.958752e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.660565e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.662263e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.662263e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 8.386452 sec -INFO: No Floating Point Exceptions have been reported - 25,647,894,641 cycles # 3.057 GHz - 78,959,237,985 instructions # 3.08 insn per cycle - 8.390795470 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.176568 sec +INFO: No Floating Point Exceptions have been reported + 21,675,145,023 cycles:u # 3.508 GHz (74.96%) + 1,029,031 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.02%) + 3,346,561,647 stalled-cycles-backend:u # 15.44% backend cycles idle (75.01%) + 78,065,884,281 instructions:u # 3.60 insn per cycle + # 0.04 stalled cycles per insn (75.01%) + 6.181564383 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.631833e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.635219e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.635219e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.471851e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.476956e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.476956e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.523349 sec -INFO: No Floating Point Exceptions have been reported - 13,074,947,964 cycles # 2.889 GHz - 39,559,504,140 instructions # 3.03 insn per cycle - 4.527544607 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.004842 sec +INFO: No Floating Point Exceptions have been reported + 10,532,277,206 cycles:u # 3.503 GHz (75.00%) + 522,789 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) + 1,378,827,302 stalled-cycles-backend:u # 13.09% backend cycles idle (74.99%) + 39,375,118,112 instructions:u # 3.74 insn per cycle + # 0.04 stalled cycles per insn (74.99%) + 3.008914073 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.398181e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.415106e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.415106e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.238951e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.241547e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.241547e+04 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.960799 sec -INFO: No Floating Point Exceptions have been reported - 5,617,485,604 cycles # 2.860 GHz - 13,822,447,933 instructions # 2.46 insn per cycle - 1.965050700 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +TOTAL : 1.331496 sec +INFO: No Floating Point Exceptions have been reported + 4,653,892,675 cycles:u # 3.491 GHz (74.81%) + 755,875 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.53%) + 416,266,786 stalled-cycles-backend:u # 8.94% backend cycles idle (74.55%) + 13,838,410,994 instructions:u # 2.97 insn per cycle + # 0.03 stalled cycles per insn (75.05%) + 1.335537948 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.596236e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.620000e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.620000e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.716966 sec -INFO: No Floating Point Exceptions have been reported - 4,918,671,268 cycles # 2.859 GHz - 12,502,910,272 instructions # 2.54 insn per cycle - 1.721169261 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.498633e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.512281e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.512281e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.195425 sec -INFO: No Floating Point Exceptions have been reported - 4,134,969,374 cycles # 1.881 GHz - 6,389,980,315 instructions # 1.55 insn per cycle - 2.199787012 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 3af515fdce..73b422fb64 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,86 +1,69 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_11:18:35 -DATE: 2024-10-02_23:08:10 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.060906e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.341479e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.343286e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.519080 sec -INFO: No Floating Point Exceptions have been reported - 2,221,734,414 cycles # 2.960 GHz - 3,514,068,927 instructions # 1.58 insn per cycle - 0.810053031 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.220956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.256303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.256456e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 0.562105 sec +INFO: No Floating Point Exceptions have been reported + 1,615,560,035 cycles:u # 2.884 GHz (75.64%) + 3,279,106 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.73%) + 38,318,371 stalled-cycles-backend:u # 2.37% backend cycles idle (75.75%) + 2,007,901,935 instructions:u # 1.24 insn per cycle + # 0.02 stalled cycles per insn (75.48%) + 0.612103578 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.749279e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.174695e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.175895e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.201027 sec -INFO: No Floating Point Exceptions have been reported - 10,427,032,875 cycles # 3.015 GHz - 22,883,454,671 instructions # 2.19 insn per cycle - 3.514669910 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.749778e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.814387e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.814503e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 +TOTAL : 6.810345 sec +INFO: No Floating Point Exceptions have been reported + 23,259,531,034 cycles:u # 3.398 GHz (75.09%) + 38,887,134 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.99%) + 1,113,006,856 stalled-cycles-backend:u # 4.79% backend cycles idle (74.95%) + 20,783,979,610 instructions:u # 0.89 insn per cycle + # 0.05 stalled cycles per insn (74.86%) + 6.871836634 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -88,33 +71,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.951553e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.952512e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.952512e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.410578 sec -INFO: No Floating Point Exceptions have been reported - 25,641,456,753 cycles # 3.048 GHz - 78,954,490,540 instructions # 3.08 insn per cycle - 8.414704716 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.671337e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.672629e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.672629e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.148237 sec +INFO: No Floating Point Exceptions have been reported + 21,524,198,011 cycles:u # 3.500 GHz (75.03%) + 871,853 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.97%) + 3,159,080,202 stalled-cycles-backend:u # 14.68% backend cycles idle (74.97%) + 78,121,639,221 instructions:u # 3.63 insn per cycle + # 0.04 stalled cycles per insn (75.03%) + 6.152385882 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -122,31 +106,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.419759e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.422883e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.422883e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.801765 sec -INFO: No Floating Point Exceptions have been reported - 13,757,257,019 cycles # 2.863 GHz - 39,559,580,410 instructions # 2.88 insn per cycle - 4.806002877 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.477126e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.482243e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.482243e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.002162 sec +INFO: No Floating Point Exceptions have been reported + 10,518,742,993 cycles:u # 3.501 GHz (74.97%) + 436,480 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.97%) + 1,374,111,555 stalled-cycles-backend:u # 13.06% backend cycles idle (74.97%) + 39,405,294,105 instructions:u # 3.75 insn per cycle + # 0.03 stalled cycles per insn (74.97%) + 3.006362595 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -154,31 +141,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.392232e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.409007e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.409007e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.960333 sec -INFO: No Floating Point Exceptions have been reported - 5,607,404,860 cycles # 2.855 GHz - 13,823,277,017 instructions # 2.47 insn per cycle - 1.964520797 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.230963e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.233559e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.233559e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.340843 sec +INFO: No Floating Point Exceptions have been reported + 4,682,086,210 cycles:u # 3.486 GHz (74.99%) + 447,842 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) + 440,082,686 stalled-cycles-backend:u # 9.40% backend cycles idle (74.98%) + 13,804,782,265 instructions:u # 2.95 insn per cycle + # 0.03 stalled cycles per insn (74.98%) + 1.344891707 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -186,76 +176,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.473692e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.495146e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.495146e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.737232 sec -INFO: No Floating Point Exceptions have been reported - 4,913,030,620 cycles # 2.823 GHz - 12,505,111,466 instructions # 2.55 insn per cycle - 1.741396842 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.352701e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.365792e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.365792e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.237312 sec -INFO: No Floating Point Exceptions have been reported - 4,145,251,099 cycles # 1.850 GHz - 6,392,502,399 instructions # 1.54 insn per cycle - 2.241587160 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 296b845e54..7faa487866 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_10:27:23 -DATE: 2024-10-02_22:29:15 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.311659e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.341543e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.343557e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.530710 sec -INFO: No Floating Point Exceptions have been reported - 2,270,985,914 cycles # 2.965 GHz - 3,517,062,690 instructions # 1.55 insn per cycle - 0.822991293 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.215784e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.273497e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273655e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.541809 sec +INFO: No Floating Point Exceptions have been reported + 1,606,817,035 cycles:u # 2.918 GHz (74.91%) + 2,376,384 stalled-cycles-frontend:u # 0.15% frontend cycles idle (76.08%) + 5,463,411 stalled-cycles-backend:u # 0.34% backend cycles idle (76.60%) + 1,982,702,097 instructions:u # 1.23 insn per cycle + # 0.00 stalled cycles per insn (74.44%) + 0.596494995 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.147376e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.178022e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.179287e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.036375 sec -INFO: No Floating Point Exceptions have been reported - 9,886,012,446 cycles # 2.996 GHz - 20,958,419,825 instructions # 2.12 insn per cycle - 3.356479014 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.815220e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.821763e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821880e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.026904 sec +INFO: No Floating Point Exceptions have been reported + 20,631,233,989 cycles:u # 3.411 GHz (75.10%) + 3,303,034 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.06%) + 5,924,934 stalled-cycles-backend:u # 0.03% backend cycles idle (74.96%) + 18,496,865,295 instructions:u # 0.90 insn per cycle + # 0.00 stalled cycles per insn (74.89%) + 6.091229642 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158133E-004 -Relative difference = 2.837296512218831e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.941477e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.942438e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.942438e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.454110 sec -INFO: No Floating Point Exceptions have been reported - 25,600,898,635 cycles # 3.027 GHz - 78,700,147,482 instructions # 3.07 insn per cycle - 8.458308380 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4191) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.675758e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.677001e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.677001e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.137224 sec +INFO: No Floating Point Exceptions have been reported + 21,485,025,252 cycles:u # 3.499 GHz (74.99%) + 855,560 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) + 2,803,835,780 stalled-cycles-backend:u # 13.05% backend cycles idle (74.99%) + 78,078,147,682 instructions:u # 3.63 insn per cycle + # 0.04 stalled cycles per insn (74.99%) + 6.144495815 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4695) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.685244e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.688800e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.688800e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.456270 sec -INFO: No Floating Point Exceptions have been reported - 13,027,228,689 cycles # 2.921 GHz - 39,448,830,373 instructions # 3.03 insn per cycle - 4.460509331 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12966) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.474541e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.479633e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.479633e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.003606 sec +INFO: No Floating Point Exceptions have been reported + 10,493,041,055 cycles:u # 3.490 GHz (74.99%) + 458,788 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) + 1,391,022,399 stalled-cycles-backend:u # 13.26% backend cycles idle (74.99%) + 39,388,790,006 instructions:u # 3.75 insn per cycle + # 0.04 stalled cycles per insn (74.99%) + 3.010970463 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11940) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.659238e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.673263e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.673263e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.147446 sec -INFO: No Floating Point Exceptions have been reported - 6,105,169,365 cycles # 2.838 GHz - 13,911,506,311 instructions # 2.28 insn per cycle - 2.151814673 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11582) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.233223e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.235836e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.235836e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.337966 sec +INFO: No Floating Point Exceptions have been reported + 4,669,113,757 cycles:u # 3.482 GHz (74.96%) + 302,720 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) + 559,911,149 stalled-cycles-backend:u # 11.99% backend cycles idle (74.95%) + 13,826,818,213 instructions:u # 2.96 insn per cycle + # 0.04 stalled cycles per insn (74.95%) + 1.345263511 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10220) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 +Avg ME (F77/C++) = 6.6266731198157309E-004 +Relative difference = 2.837296636563793e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.414304e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.436030e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.436030e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.748441 sec -INFO: No Floating Point Exceptions have been reported - 4,989,990,459 cycles # 2.848 GHz - 12,602,385,911 instructions # 2.53 insn per cycle - 1.752785329 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10423) (512y: 241) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.286007e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.299200e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.299200e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.257195 sec -INFO: No Floating Point Exceptions have been reported - 4,157,035,910 cycles # 1.839 GHz - 6,500,123,841 instructions # 1.56 insn per cycle - 2.261537219 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1754) (512y: 193) (512z: 9382) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157320E-004 -Relative difference = 2.837296634927675e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index b2e3af3136..bead9bc4fd 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_11:05:55 -DATE: 2024-10-02_22:53:31 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.100239e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.122259e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.123671e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.538955 sec -INFO: No Floating Point Exceptions have been reported - 2,284,263,136 cycles # 2.966 GHz - 3,551,683,146 instructions # 1.55 insn per cycle - 0.827784044 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.204581e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.259080e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.259232e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.544267 sec +INFO: No Floating Point Exceptions have been reported + 1,594,544,095 cycles:u # 2.890 GHz (75.38%) + 2,297,817 stalled-cycles-frontend:u # 0.14% frontend cycles idle (75.34%) + 6,864,918 stalled-cycles-backend:u # 0.43% backend cycles idle (75.46%) + 2,035,542,593 instructions:u # 1.28 insn per cycle + # 0.00 stalled cycles per insn (75.47%) + 0.596819706 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.754763e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.780247e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.781287e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.308444 sec -INFO: No Floating Point Exceptions have been reported - 10,753,673,387 cycles # 3.016 GHz - 22,598,773,039 instructions # 2.10 insn per cycle - 3.621798315 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.807801e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.813996e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.814112e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.039166 sec +INFO: No Floating Point Exceptions have been reported + 20,672,886,525 cycles:u # 3.409 GHz (74.94%) + 3,556,848 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.08%) + 6,584,175 stalled-cycles-backend:u # 0.03% backend cycles idle (75.14%) + 18,393,856,750 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (75.12%) + 6.108540721 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.447762e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.448268e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.448268e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 36.881084 sec -INFO: No Floating Point Exceptions have been reported - 112,229,307,455 cycles # 3.043 GHz - 144,790,435,802 instructions # 1.29 insn per cycle - 36.885388068 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21273) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.642056e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.642427e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.642427e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 35.337688 sec +INFO: No Floating Point Exceptions have been reported + 123,786,426,168 cycles:u # 3.503 GHz (74.99%) + 32,182,693 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.00%) + 11,797,922,825 stalled-cycles-backend:u # 9.53% backend cycles idle (75.01%) + 141,197,682,575 instructions:u # 1.14 insn per cycle + # 0.08 stalled cycles per insn (75.00%) + 35.345151666 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21379) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.213545e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.216099e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.216099e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.109796 sec -INFO: No Floating Point Exceptions have been reported - 14,729,625,754 cycles # 2.881 GHz - 37,604,791,196 instructions # 2.55 insn per cycle - 5.114120613 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68172) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.625433e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.627671e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.627671e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.532024 sec +INFO: No Floating Point Exceptions have been reported + 15,876,135,975 cycles:u # 3.501 GHz (74.95%) + 4,561,261 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.95%) + 6,682,795,030 stalled-cycles-backend:u # 42.09% backend cycles idle (74.95%) + 37,517,219,456 instructions:u # 2.36 insn per cycle + # 0.18 stalled cycles per insn (74.96%) + 4.539242499 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68150) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141209E-004 -Relative difference = 2.8372990661989057e-07 +Avg ME (F77/C++) = 6.6266731198141220E-004 +Relative difference = 2.837299064562788e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.692100e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.706833e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.706833e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.138452 sec -INFO: No Floating Point Exceptions have been reported - 6,118,049,713 cycles # 2.856 GHz - 13,052,938,667 instructions # 2.13 insn per cycle - 2.142728323 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46946) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.516587e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.526357e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.526357e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.189932 sec +INFO: No Floating Point Exceptions have been reported + 7,653,291,510 cycles:u # 3.490 GHz (74.83%) + 433,752 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.83%) + 4,407,512,563 stalled-cycles-backend:u # 57.59% backend cycles idle (74.91%) + 12,913,139,300 instructions:u # 1.69 insn per cycle + # 0.34 stalled cycles per insn (75.09%) + 2.197244436 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46482) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 +Avg ME (F77/C++) = 6.6266731198156778E-004 +Relative difference = 2.837296716733571e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.248664e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.270457e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.270457e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.779918 sec -INFO: No Floating Point Exceptions have been reported - 5,070,510,804 cycles # 2.845 GHz - 11,451,450,406 instructions # 2.26 insn per cycle - 1.784180525 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40486) (512y: 285) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.770608e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.785711e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.785711e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.116802 sec -INFO: No Floating Point Exceptions have been reported - 3,955,046,373 cycles # 1.865 GHz - 5,927,215,305 instructions # 1.50 insn per cycle - 2.121083388 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2444) (512y: 337) (512z:39338) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 567d9226df..6d4b979ef0 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_11:06:53 -DATE: 2024-10-02_22:54:38 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.114232e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.137301e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.138948e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.536968 sec -INFO: No Floating Point Exceptions have been reported - 2,275,180,937 cycles # 2.958 GHz - 3,539,221,489 instructions # 1.56 insn per cycle - 0.826289591 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.217021e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.273344e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.273498e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.543903 sec +INFO: No Floating Point Exceptions have been reported + 1,612,633,435 cycles:u # 2.918 GHz (75.12%) + 2,569,790 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.51%) + 8,142,655 stalled-cycles-backend:u # 0.50% backend cycles idle (75.60%) + 2,080,630,465 instructions:u # 1.29 insn per cycle + # 0.00 stalled cycles per insn (75.23%) + 0.592397245 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.750926e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.776588e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.777633e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.299647 sec -INFO: No Floating Point Exceptions have been reported - 10,717,601,484 cycles # 3.014 GHz - 24,394,837,994 instructions # 2.28 insn per cycle - 3.614900556 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.815395e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.821733e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821850e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.032526 sec +INFO: No Floating Point Exceptions have been reported + 20,680,767,800 cycles:u # 3.411 GHz (74.98%) + 3,381,725 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.06%) + 7,930,374 stalled-cycles-backend:u # 0.04% backend cycles idle (75.08%) + 18,435,488,785 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (75.04%) + 6.145204538 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158122E-004 -Relative difference = 2.837296513854949e-07 +Avg ME (F77/GPU) = 6.6266731198158101E-004 +Relative difference = 2.837296517127185e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.368481e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.368956e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.368956e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 37.549568 sec -INFO: No Floating Point Exceptions have been reported - 113,756,177,543 cycles # 3.029 GHz - 144,279,233,748 instructions # 1.27 insn per cycle - 37.553893626 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21024) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.607078e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.607445e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.607445e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 35.605652 sec +INFO: No Floating Point Exceptions have been reported + 124,829,217,868 cycles:u # 3.506 GHz (75.00%) + 79,483,257 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) + 10,477,235,146 stalled-cycles-backend:u # 8.39% backend cycles idle (75.00%) + 140,886,082,991 instructions:u # 1.13 insn per cycle + # 0.07 stalled cycles per insn (75.00%) + 35.616217715 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21174) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198140450E-004 -Relative difference = 2.83729918072716e-07 +Avg ME (F77/C++) = 6.6266731198140482E-004 +Relative difference = 2.8372991758188064e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.101360e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.103709e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.103709e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 5.293950 sec -INFO: No Floating Point Exceptions have been reported - 15,276,793,173 cycles # 2.885 GHz - 37,839,533,934 instructions # 2.48 insn per cycle - 5.298219477 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68594) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.559493e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.561660e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.561660e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 4.615733 sec +INFO: No Floating Point Exceptions have been reported + 16,129,890,206 cycles:u # 3.493 GHz (74.91%) + 3,026,043 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.96%) + 6,217,990,465 stalled-cycles-backend:u # 38.55% backend cycles idle (75.06%) + 37,497,496,126 instructions:u # 2.32 insn per cycle + # 0.17 stalled cycles per insn (75.06%) + 4.622951464 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68049) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141209E-004 -Relative difference = 2.8372990661989057e-07 +Avg ME (F77/C++) = 6.6266731198141220E-004 +Relative difference = 2.837299064562788e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.769981e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.784911e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.784911e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.116737 sec -INFO: No Floating Point Exceptions have been reported - 5,996,887,243 cycles # 2.829 GHz - 12,920,986,626 instructions # 2.15 insn per cycle - 2.120808857 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46048) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.688066e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.698173e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.698173e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.141376 sec +INFO: No Floating Point Exceptions have been reported + 7,490,478,364 cycles:u # 3.493 GHz (75.01%) + 398,988 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 4,139,339,201 stalled-cycles-backend:u # 55.26% backend cycles idle (75.01%) + 12,775,314,953 instructions:u # 1.71 insn per cycle + # 0.32 stalled cycles per insn (75.01%) + 2.148542351 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45597) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 +Avg ME (F77/C++) = 6.6266731198156778E-004 +Relative difference = 2.837296716733571e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.205151e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.226957e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.226957e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.787893 sec -INFO: No Floating Point Exceptions have been reported - 5,091,257,021 cycles # 2.842 GHz - 11,450,857,319 instructions # 2.25 insn per cycle - 1.792163037 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40151) (512y: 219) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.725567e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.740384e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.740384e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.129337 sec -INFO: No Floating Point Exceptions have been reported - 3,958,012,203 cycles # 1.856 GHz - 5,893,673,725 instructions # 1.49 insn per cycle - 2.133623159 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1959) (512y: 259) (512z:38977) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156789E-004 -Relative difference = 2.837296715097453e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 5d514798b3..5808decd6f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_10:27:45 -DATE: 2024-10-02_22:29:49 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.483751e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.526267e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.530499e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.494153 sec -INFO: No Floating Point Exceptions have been reported - 2,103,124,807 cycles # 2.954 GHz - 3,121,712,472 instructions # 1.48 insn per cycle - 0.773554314 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 2.013165e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.166222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.166578e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 +TOTAL : 0.488146 sec +INFO: No Floating Point Exceptions have been reported + 1,415,555,543 cycles:u # 2.855 GHz (75.38%) + 2,516,691 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.39%) + 5,607,343 stalled-cycles-backend:u # 0.40% backend cycles idle (76.18%) + 1,846,857,501 instructions:u # 1.30 insn per cycle + # 0.00 stalled cycles per insn (75.91%) + 0.543476502 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.160066e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.222867e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.225655e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.790893 sec -INFO: No Floating Point Exceptions have been reported - 6,074,189,476 cycles # 2.980 GHz - 12,927,595,973 instructions # 2.13 insn per cycle - 2.094579269 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.941879e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.965090e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.965379e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 +TOTAL : 3.641309 sec +INFO: No Floating Point Exceptions have been reported + 12,398,470,985 cycles:u # 3.390 GHz (74.72%) + 2,675,203 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.88%) + 12,468,924 stalled-cycles-backend:u # 0.10% backend cycles idle (74.85%) + 11,363,812,580 instructions:u # 0.92 insn per cycle + # 0.00 stalled cycles per insn (75.09%) + 3.700528617 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025603446138E-004 +Relative difference = 4.022437625032909e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.991600e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.992621e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.992621e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.239956 sec -INFO: No Floating Point Exceptions have been reported - 24,920,798,039 cycles # 3.024 GHz - 79,109,177,964 instructions # 3.17 insn per cycle - 8.244226962 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.747827e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.749091e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.749091e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 5.974614 sec +INFO: No Floating Point Exceptions have been reported + 20,938,735,525 cycles:u # 3.503 GHz (74.99%) + 1,421,346 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) + 2,789,809,039 stalled-cycles-backend:u # 13.32% backend cycles idle (74.97%) + 78,052,866,435 instructions:u # 3.73 insn per cycle + # 0.04 stalled cycles per insn (74.97%) + 5.982004691 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274868816393329E-004 +Relative difference = 1.7859056895059718e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.256911e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.270142e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.270142e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.264792 sec -INFO: No Floating Point Exceptions have been reported - 6,533,363,065 cycles # 2.880 GHz - 20,270,541,393 instructions # 3.10 insn per cycle - 2.268973901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.090926e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.092983e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.092983e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.509761 sec +INFO: No Floating Point Exceptions have been reported + 5,288,936,507 cycles:u # 3.497 GHz (74.69%) + 217,362 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.83%) + 697,270,102 stalled-cycles-backend:u # 13.18% backend cycles idle (75.08%) + 20,304,183,045 instructions:u # 3.84 insn per cycle + # 0.03 stalled cycles per insn (75.14%) + 1.516786989 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274847398845038E-004 +Relative difference = 3.924799464139408e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.646998e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.654072e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.654072e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.000797 sec -INFO: No Floating Point Exceptions have been reported - 2,839,215,106 cycles # 2.827 GHz - 7,065,941,238 instructions # 2.49 insn per cycle - 1.004916383 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.410807e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.421078e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.421078e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.686476 sec +INFO: No Floating Point Exceptions have been reported + 2,407,278,996 cycles:u # 3.493 GHz (74.57%) + 915,950 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.47%) + 263,753,591 stalled-cycles-backend:u # 10.96% backend cycles idle (74.37%) + 7,042,386,809 instructions:u # 2.93 insn per cycle + # 0.04 stalled cycles per insn (74.95%) + 0.693448187 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271946993158581E-004 +Relative difference = 4.537125319208525e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.869083e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.877796e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.877796e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.882438 sec -INFO: No Floating Point Exceptions have been reported - 2,527,237,536 cycles # 2.853 GHz - 6,403,613,133 instructions # 2.53 insn per cycle - 0.886591858 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.495984e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.501538e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.501538e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.101478 sec -INFO: No Floating Point Exceptions have been reported - 2,074,107,629 cycles # 1.877 GHz - 3,304,393,311 instructions # 1.59 insn per cycle - 1.105808487 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 2dfc41840b..210503fe64 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,97 +1,77 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_11:16:01 -DATE: 2024-10-02_23:03:32 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.941350e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.461692e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.461692e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.477533 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,051,019,219 cycles # 2.960 GHz - 3,077,913,039 instructions # 1.50 insn per cycle - 0.750579271 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 2.048713e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.154939e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.154939e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.206052e-01 +- 3.252639e-01 ) GeV^-4 +TOTAL : 0.494255 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,484,258,659 cycles:u # 2.920 GHz (74.03%) + 3,740,150 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.79%) + 37,553,445 stalled-cycles-backend:u # 2.53% backend cycles idle (76.42%) + 1,853,356,007 instructions:u # 1.25 insn per cycle + # 0.02 stalled cycles per insn (75.38%) + 0.544261251 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.966568e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.089944e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.089944e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 -TOTAL : 1.964323 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,640,871,467 cycles # 3.008 GHz - 14,013,929,876 instructions # 2.11 insn per cycle - 2.263846286 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.649670e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.949091e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.949091e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.183967e+02 +- 1.165669e+02 ) GeV^-4 +TOTAL : 4.451863 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 15,013,669,983 cycles:u # 3.353 GHz (75.18%) + 39,259,352 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.10%) + 1,108,164,012 stalled-cycles-backend:u # 7.38% backend cycles idle (74.99%) + 13,601,634,469 instructions:u # 0.91 insn per cycle + # 0.08 stalled cycles per insn (74.86%) + 4.516134199 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -99,35 +79,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025603446138E-004 +Relative difference = 4.022437625032909e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.003416e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.004461e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.004461e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.193798 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 24,914,156,131 cycles # 3.040 GHz - 79,113,283,238 instructions # 3.18 insn per cycle - 8.198127255 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.739855e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.741118e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.741118e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 5.994162 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 21,020,909,991 cycles:u # 3.505 GHz (74.84%) + 7,289,869 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.01%) + 2,786,603,340 stalled-cycles-backend:u # 13.26% backend cycles idle (75.05%) + 78,042,375,969 instructions:u # 3.71 insn per cycle + # 0.04 stalled cycles per insn (75.05%) + 6.001397483 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -135,33 +116,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274868816393329E-004 +Relative difference = 1.7859056895059718e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.268604e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.282277e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.282277e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.263945 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,560,498,165 cycles # 2.893 GHz - 20,280,423,064 instructions # 3.09 insn per cycle - 2.268263136 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.089785e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.091828e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.091828e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.513689 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,299,089,664 cycles:u # 3.494 GHz (74.69%) + 410,422 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.84%) + 720,273,200 stalled-cycles-backend:u # 13.59% backend cycles idle (75.11%) + 20,303,253,905 instructions:u # 3.83 insn per cycle + # 0.04 stalled cycles per insn (75.21%) + 1.521153254 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -169,33 +153,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274847398845038E-004 +Relative difference = 3.924799464139408e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.650562e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.657776e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.657776e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.001327 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,847,194,781 cycles # 2.833 GHz - 7,076,285,592 instructions # 2.49 insn per cycle - 1.005550089 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.422716e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.433093e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.433093e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.685519 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,400,647,554 cycles:u # 3.487 GHz (74.51%) + 764,490 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.45%) + 226,385,909 stalled-cycles-backend:u # 9.43% backend cycles idle (74.53%) + 7,042,751,685 instructions:u # 2.93 insn per cycle + # 0.03 stalled cycles per insn (75.09%) + 0.692625436 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -203,80 +190,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271946993158581E-004 +Relative difference = 4.537125319208525e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.886394e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.895503e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.895503e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.876996 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,532,131,583 cycles # 2.875 GHz - 6,413,285,430 instructions # 2.53 insn per cycle - 0.881306742 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.496106e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.501711e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.501711e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.104249 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,079,471,281 cycles # 1.877 GHz - 3,314,022,575 instructions # 1.59 insn per cycle - 1.108641897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index f59a43ef84..30c3c51f0d 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_11:20:44 -DATE: 2024-10-02_23:14:13 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.506269e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.548412e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.552269e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.159396e-01 +- 3.238803e-01 ) GeV^-4 -TOTAL : 0.473972 sec -INFO: No Floating Point Exceptions have been reported - 2,046,977,318 cycles # 2.972 GHz - 3,047,751,198 instructions # 1.49 insn per cycle - 0.746093011 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.977433e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.163711e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.164069e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.205840e-01 +- 3.252482e-01 ) GeV^-4 +TOTAL : 0.486131 sec +INFO: No Floating Point Exceptions have been reported + 1,432,838,430 cycles:u # 2.878 GHz (75.92%) + 2,865,850 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.92%) + 33,010,647 stalled-cycles-backend:u # 2.30% backend cycles idle (75.92%) + 1,834,730,819 instructions:u # 1.28 insn per cycle + # 0.02 stalled cycles per insn (74.42%) + 0.532007069 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.132349e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.194879e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.197694e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 -TOTAL : 1.875001 sec -INFO: No Floating Point Exceptions have been reported - 6,377,015,026 cycles # 3.014 GHz - 13,456,664,964 instructions # 2.11 insn per cycle - 2.175037071 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.941273e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.966657e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.966945e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.183835e+02 +- 1.165669e+02 ) GeV^-4 +TOTAL : 4.316072 sec +INFO: No Floating Point Exceptions have been reported + 14,634,879,974 cycles:u # 3.375 GHz (75.17%) + 28,023,828 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.06%) + 1,080,177,933 stalled-cycles-backend:u # 7.38% backend cycles idle (74.92%) + 12,802,878,043 instructions:u # 0.87 insn per cycle + # 0.08 stalled cycles per insn (74.86%) + 4.373208437 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025603446138E-004 +Relative difference = 4.022437625032909e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.008641e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.009653e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.009653e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 8.170736 sec -INFO: No Floating Point Exceptions have been reported - 24,919,535,959 cycles # 3.049 GHz - 79,107,568,196 instructions # 3.17 insn per cycle - 8.174687518 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.743804e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.745174e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.745174e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 5.984050 sec +INFO: No Floating Point Exceptions have been reported + 20,959,056,605 cycles:u # 3.501 GHz (75.01%) + 7,048,916 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.01%) + 2,759,736,387 stalled-cycles-backend:u # 13.17% backend cycles idle (75.01%) + 78,050,009,240 instructions:u # 3.72 insn per cycle + # 0.04 stalled cycles per insn (75.01%) + 5.987947953 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274868816393329E-004 +Relative difference = 1.7859056895059718e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.228176e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.241678e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.241678e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 -TOTAL : 2.274725 sec -INFO: No Floating Point Exceptions have been reported - 6,529,719,760 cycles # 2.866 GHz - 20,269,126,653 instructions # 3.10 insn per cycle - 2.278762144 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.091176e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.093233e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.093233e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.509177 sec +INFO: No Floating Point Exceptions have been reported + 5,293,082,594 cycles:u # 3.503 GHz (74.76%) + 231,968 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.02%) + 702,913,833 stalled-cycles-backend:u # 13.28% backend cycles idle (75.12%) + 20,309,990,457 instructions:u # 3.84 insn per cycle + # 0.03 stalled cycles per insn (75.12%) + 1.512971887 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274847398845038E-004 +Relative difference = 3.924799464139408e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.543967e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.550020e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.550020e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 1.068855 sec -INFO: No Floating Point Exceptions have been reported - 2,839,565,669 cycles # 2.648 GHz - 7,065,359,777 instructions # 2.49 insn per cycle - 1.073003064 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.212276e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.223478e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.223478e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.749293 sec +INFO: No Floating Point Exceptions have been reported + 2,618,312,640 cycles:u # 3.483 GHz (74.47%) + 481,078 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.49%) + 274,174,540 stalled-cycles-backend:u # 10.47% backend cycles idle (74.89%) + 7,033,766,117 instructions:u # 2.69 insn per cycle + # 0.04 stalled cycles per insn (75.39%) + 0.753840551 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271946993158581E-004 +Relative difference = 4.537125319208525e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.860425e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.869459e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.869459e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 -TOTAL : 0.887472 sec -INFO: No Floating Point Exceptions have been reported - 2,533,693,672 cycles # 2.846 GHz - 6,400,193,071 instructions # 2.53 insn per cycle - 0.891520698 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.480335e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.485766e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.485766e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 1.114517 sec -INFO: No Floating Point Exceptions have been reported - 2,073,817,797 cycles # 1.855 GHz - 3,302,576,002 instructions # 1.59 insn per cycle - 1.118521025 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index d51b50aa19..3f21b859d4 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,86 +1,69 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_11:18:58 -DATE: 2024-10-02_23:08:43 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.026858e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.479959e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.483629e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 -TOTAL : 0.478080 sec -INFO: No Floating Point Exceptions have been reported - 2,041,849,266 cycles # 2.949 GHz - 3,029,425,267 instructions # 1.48 insn per cycle - 0.750979183 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 2.064587e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.167586e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.167925e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.206052e-01 +- 3.252639e-01 ) GeV^-4 +TOTAL : 0.485000 sec +INFO: No Floating Point Exceptions have been reported + 1,450,564,825 cycles:u # 2.892 GHz (74.89%) + 3,384,624 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.13%) + 32,936,362 stalled-cycles-backend:u # 2.27% backend cycles idle (74.69%) + 1,893,341,305 instructions:u # 1.31 insn per cycle + # 0.02 stalled cycles per insn (75.29%) + 0.531131671 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.176974e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.225245e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.228004e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 -TOTAL : 1.893219 sec -INFO: No Floating Point Exceptions have been reported - 6,369,671,972 cycles # 2.999 GHz - 13,805,433,323 instructions # 2.17 insn per cycle - 2.180376348 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.674766e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.963630e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.963917e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.183967e+02 +- 1.165669e+02 ) GeV^-4 +TOTAL : 4.410855 sec +INFO: No Floating Point Exceptions have been reported + 14,968,323,331 cycles:u # 3.374 GHz (75.17%) + 39,156,540 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.07%) + 1,092,203,690 stalled-cycles-backend:u # 7.30% backend cycles idle (74.77%) + 13,544,332,277 instructions:u # 0.90 insn per cycle + # 0.08 stalled cycles per insn (74.77%) + 4.467521167 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -88,33 +71,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025603446138E-004 +Relative difference = 4.022437625032909e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.002985e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.003965e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.003965e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.192685 sec -INFO: No Floating Point Exceptions have been reported - 24,899,500,908 cycles # 3.038 GHz - 79,109,193,695 instructions # 3.18 insn per cycle - 8.196731570 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.737327e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.738674e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.738674e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 5.998189 sec +INFO: No Floating Point Exceptions have been reported + 21,034,970,742 cycles:u # 3.506 GHz (74.94%) + 7,216,384 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.94%) + 2,747,143,202 stalled-cycles-backend:u # 13.06% backend cycles idle (74.99%) + 78,077,897,154 instructions:u # 3.71 insn per cycle + # 0.04 stalled cycles per insn (75.05%) + 6.002159436 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -122,31 +106,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274863312764526E-004 -Relative difference = 4.998523613136231e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627487e-04 +Avg ME (F77/C++) = 6.6274868816393329E-004 +Relative difference = 1.7859056895059718e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.200812e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.214231e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.214231e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.282348 sec -INFO: No Floating Point Exceptions have been reported - 6,530,583,474 cycles # 2.857 GHz - 20,270,600,320 instructions # 3.10 insn per cycle - 2.286554025 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.082396e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.084464e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.084464e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.521644 sec +INFO: No Floating Point Exceptions have been reported + 5,326,757,820 cycles:u # 3.497 GHz (74.82%) + 252,670 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.81%) + 703,654,556 stalled-cycles-backend:u # 13.21% backend cycles idle (74.79%) + 20,327,779,437 instructions:u # 3.82 insn per cycle + # 0.03 stalled cycles per insn (74.96%) + 1.525462915 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -154,31 +141,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861442972011E-004 -Relative difference = 2.1772539563413118e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274847398845038E-004 +Relative difference = 3.924799464139408e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.663107e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.670148e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.670148e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.991015 sec -INFO: No Floating Point Exceptions have been reported - 2,834,464,958 cycles # 2.850 GHz - 7,065,761,630 instructions # 2.49 insn per cycle - 0.995105206 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.425817e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.437263e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.437263e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.682637 sec +INFO: No Floating Point Exceptions have been reported + 2,386,005,165 cycles:u # 3.485 GHz (74.16%) + 292,181 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.74%) + 214,651,421 stalled-cycles-backend:u # 9.00% backend cycles idle (75.46%) + 7,027,158,575 instructions:u # 2.95 insn per cycle + # 0.03 stalled cycles per insn (75.46%) + 0.686409709 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -186,76 +176,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271946993158581E-004 +Relative difference = 4.537125319208525e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.873004e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.881673e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.881673e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.880479 sec -INFO: No Floating Point Exceptions have been reported - 2,525,421,644 cycles # 2.857 GHz - 6,403,279,155 instructions # 2.54 insn per cycle - 0.884506369 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271938174396888E-004 -Relative difference = 2.7547150614455683e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.474559e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.479875e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.479875e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.117237 sec -INFO: No Floating Point Exceptions have been reported - 2,067,196,285 cycles # 1.845 GHz - 3,303,704,117 instructions # 1.60 insn per cycle - 1.121426905 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952779718007E-004 -Relative difference = 4.194411063934945e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index e59a4c7649..e26dda0aaa 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_10:28:03 -DATE: 2024-10-02_22:30:15 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.512381e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.556061e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.560063e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.492452 sec -INFO: No Floating Point Exceptions have been reported - 2,099,626,604 cycles # 2.948 GHz - 3,069,125,723 instructions # 1.46 insn per cycle - 0.769337960 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 2.007232e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.155231e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.155588e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 +TOTAL : 0.478018 sec +INFO: No Floating Point Exceptions have been reported + 1,407,702,810 cycles:u # 2.886 GHz (75.49%) + 2,592,505 stalled-cycles-frontend:u # 0.18% frontend cycles idle (73.44%) + 10,637,742 stalled-cycles-backend:u # 0.76% backend cycles idle (73.52%) + 1,813,846,392 instructions:u # 1.29 insn per cycle + # 0.01 stalled cycles per insn (74.27%) + 0.532596162 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.132307e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.195668e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.198555e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.801389 sec -INFO: No Floating Point Exceptions have been reported - 6,087,353,843 cycles # 2.992 GHz - 12,902,099,211 instructions # 2.12 insn per cycle - 2.093261081 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.016489e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.041051e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.041354e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 +TOTAL : 3.547661 sec +INFO: No Floating Point Exceptions have been reported + 12,037,965,362 cycles:u # 3.379 GHz (75.08%) + 2,929,122 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.04%) + 6,308,121 stalled-cycles-backend:u # 0.05% backend cycles idle (74.92%) + 11,034,214,209 instructions:u # 0.92 insn per cycle + # 0.00 stalled cycles per insn (75.01%) + 3.607477945 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262667672387088E-004 -Relative difference = 2.825534762507892e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025603446138E-004 +Relative difference = 4.022437625032909e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.002964e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.003993e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.003993e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 -TOTAL : 8.192835 sec -INFO: No Floating Point Exceptions have been reported - 24,924,243,070 cycles # 3.041 GHz - 78,847,605,592 instructions # 3.16 insn per cycle - 8.196950693 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.734133e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.735456e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.735456e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 6.004874 sec +INFO: No Floating Point Exceptions have been reported + 21,034,046,883 cycles:u # 3.501 GHz (74.99%) + 1,246,412 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) + 2,757,000,098 stalled-cycles-backend:u # 13.11% backend cycles idle (74.97%) + 78,049,772,360 instructions:u # 3.71 insn per cycle + # 0.04 stalled cycles per insn (74.97%) + 6.012706081 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274866250177339E-004 -Relative difference = 5.65798569465384e-08 +Avg ME (F77/C++) = 6.6274868874222764E-004 +Relative difference = 1.698648731198014e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.423205e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.437587e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.437587e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 -TOTAL : 2.213992 sec -INFO: No Floating Point Exceptions have been reported - 6,479,488,334 cycles # 2.922 GHz - 20,229,540,572 instructions # 3.12 insn per cycle - 2.218146120 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13491) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.086370e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.088395e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088395e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 1.515750 sec +INFO: No Floating Point Exceptions have been reported + 5,306,409,578 cycles:u # 3.494 GHz (74.72%) + 222,743 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.76%) + 816,077,120 stalled-cycles-backend:u # 15.38% backend cycles idle (75.00%) + 20,308,592,291 instructions:u # 3.83 insn per cycle + # 0.04 stalled cycles per insn (75.23%) + 1.522939793 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274861448331612E-004 -Relative difference = 2.1853408865157068e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627485e-04 +Avg ME (F77/C++) = 6.6274847398845038E-004 +Relative difference = 3.924799464139408e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.565281e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.571362e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.571362e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 1.052426 sec -INFO: No Floating Point Exceptions have been reported - 2,984,858,604 cycles # 2.826 GHz - 7,206,634,684 instructions # 2.41 insn per cycle - 1.056645042 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12437) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.426307e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.436673e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.436673e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 0.682233 sec +INFO: No Floating Point Exceptions have been reported + 2,390,357,790 cycles:u # 3.489 GHz (74.41%) + 182,998 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.25%) + 251,809,756 stalled-cycles-backend:u # 10.53% backend cycles idle (74.83%) + 7,021,160,599 instructions:u # 2.94 insn per cycle + # 0.04 stalled cycles per insn (75.48%) + 0.689173465 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10773) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271939668088170E-004 -Relative difference = 5.008331292535666e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271946993158581E-004 +Relative difference = 4.537125319208525e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.812875e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.821466e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.821466e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 -TOTAL : 0.909433 sec -INFO: No Floating Point Exceptions have been reported - 2,611,310,870 cycles # 2.860 GHz - 6,544,588,321 instructions # 2.51 insn per cycle - 0.913642429 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11449) (512y: 27) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627194e-04 -Avg ME (F77/C++) = 6.6271939668088170E-004 -Relative difference = 5.008331292535666e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.437201e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.442373e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.442373e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 -TOTAL : 1.146158 sec -INFO: No Floating Point Exceptions have been reported - 2,140,140,974 cycles # 1.862 GHz - 3,461,558,427 instructions # 1.62 insn per cycle - 1.150379984 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3037) (512y: 25) (512z: 9677) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271952032316561E-004 -Relative difference = 3.066631594207157e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index 59d4d1fb5f..f436c07646 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_11:07:52 -DATE: 2024-10-02_22:55:46 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.562021e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.605671e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.609619e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.491571 sec -INFO: No Floating Point Exceptions have been reported - 2,109,215,463 cycles # 2.972 GHz - 3,151,172,679 instructions # 1.49 insn per cycle - 0.768602284 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 2.000948e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.160689e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.161056e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 +TOTAL : 0.486809 sec +INFO: No Floating Point Exceptions have been reported + 1,377,180,607 cycles:u # 2.793 GHz (75.77%) + 2,398,789 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.40%) + 11,764,538 stalled-cycles-backend:u # 0.85% backend cycles idle (73.74%) + 1,890,847,506 instructions:u # 1.37 insn per cycle + # 0.01 stalled cycles per insn (73.97%) + 0.540106625 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.602270e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.673827e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.676735e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.733623 sec -INFO: No Floating Point Exceptions have been reported - 5,929,772,785 cycles # 3.016 GHz - 12,569,897,546 instructions # 2.12 insn per cycle - 2.025144690 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.942513e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.966205e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.966496e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 +TOTAL : 3.639339 sec +INFO: No Floating Point Exceptions have been reported + 12,378,328,092 cycles:u # 3.386 GHz (74.81%) + 2,874,435 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.81%) + 5,888,561 stalled-cycles-backend:u # 0.05% backend cycles idle (74.93%) + 11,359,397,040 instructions:u # 0.92 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 3.695933493 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262669162351490E-004 -Relative difference = 2.8232862531213374e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025603446138E-004 +Relative difference = 4.022437625032909e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.758295e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.759107e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.759107e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 28.486353 sec -INFO: No Floating Point Exceptions have been reported - 86,270,016,297 cycles # 3.028 GHz - 135,669,129,169 instructions # 1.57 insn per cycle - 28.490480934 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15856) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.090078e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.090698e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.090698e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 +TOTAL : 26.936506 sec +INFO: No Floating Point Exceptions have been reported + 94,349,632,400 cycles:u # 3.502 GHz (75.00%) + 321,547,192 stalled-cycles-frontend:u # 0.34% frontend cycles idle (75.00%) + 6,053,477,678 stalled-cycles-backend:u # 6.42% backend cycles idle (75.00%) + 132,416,937,199 instructions:u # 1.40 insn per cycle + # 0.05 stalled cycles per insn (75.00%) + 26.943698382 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:17007) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275349717465765E-004 -Relative difference = 4.26303654465793e-09 +Avg ME (F77/C++) = 6.6275346655336742E-004 +Relative difference = 5.0466172741879477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.086977e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.099732e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.099732e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.319304 sec -INFO: No Floating Point Exceptions have been reported - 6,773,827,971 cycles # 2.917 GHz - 19,353,970,780 instructions # 2.86 insn per cycle - 2.323538739 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69577) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.852540e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.863192e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.863192e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 +TOTAL : 2.094908 sec +INFO: No Floating Point Exceptions have been reported + 7,335,187,413 cycles:u # 3.497 GHz (74.84%) + 369,790 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.83%) + 3,243,303,148 stalled-cycles-backend:u # 44.22% backend cycles idle (74.86%) + 19,156,715,937 instructions:u # 2.61 insn per cycle + # 0.17 stalled cycles per insn (75.04%) + 2.102051483 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69115) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274862748188362E-004 -Relative difference = 4.14665283800746e-08 +Avg ME (F77/C++) = 6.6274857190509046E-004 +Relative difference = 4.239150340994169e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.397177e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.402070e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.402070e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.178879 sec -INFO: No Floating Point Exceptions have been reported - 3,378,583,289 cycles # 2.858 GHz - 6,795,240,952 instructions # 2.01 insn per cycle - 1.183020517 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49034) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.474649e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.478463e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.478463e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 1.118789 sec +INFO: No Floating Point Exceptions have been reported + 3,928,092,158 cycles:u # 3.502 GHz (74.85%) + 285,742 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.04%) + 2,218,678,549 stalled-cycles-backend:u # 56.48% backend cycles idle (75.04%) + 6,698,630,896 instructions:u # 1.71 insn per cycle + # 0.33 stalled cycles per insn (75.04%) + 1.125975009 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:48510) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731568543797E-004 -Relative difference = 2.3668012430631962e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627274e-04 +Avg ME (F77/C++) = 6.6272735727803539E-004 +Relative difference = 6.446385744398604e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.787992e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.796171e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.796171e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 0.922168 sec -INFO: No Floating Point Exceptions have been reported - 2,625,296,482 cycles # 2.836 GHz - 5,970,027,658 instructions # 2.27 insn per cycle - 0.926290404 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42602) (512y: 11) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731568543797E-004 -Relative difference = 2.3668012430631962e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.494711e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.500327e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.500327e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.102442 sec -INFO: No Floating Point Exceptions have been reported - 2,067,516,500 cycles # 1.870 GHz - 3,494,858,338 instructions # 1.69 insn per cycle - 1.106623225 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5208) (512y: 3) (512z:44858) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627275e-04 -Avg ME (F77/C++) = 6.6272750237027223E-004 -Relative difference = 3.5765412974815996e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index f2c87a7ab9..9afc98038f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_11:08:34 -DATE: 2024-10-02_22:56:35 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.595159e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.631816e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.635791e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 -TOTAL : 0.493140 sec -INFO: No Floating Point Exceptions have been reported - 2,108,192,087 cycles # 2.971 GHz - 3,117,683,956 instructions # 1.48 insn per cycle - 0.768416097 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 2.013033e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.163858e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.164212e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 +TOTAL : 0.480319 sec +INFO: No Floating Point Exceptions have been reported + 1,415,656,442 cycles:u # 2.887 GHz (75.48%) + 2,488,617 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.52%) + 6,001,546 stalled-cycles-backend:u # 0.42% backend cycles idle (73.61%) + 1,830,372,309 instructions:u # 1.29 insn per cycle + # 0.00 stalled cycles per insn (74.67%) + 0.528836597 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.676536e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.747415e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.750543e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 -TOTAL : 1.730862 sec -INFO: No Floating Point Exceptions have been reported - 5,933,863,280 cycles # 3.005 GHz - 11,799,586,376 instructions # 1.99 insn per cycle - 2.031002433 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.005347e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.032091e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.032389e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 +TOTAL : 3.550404 sec +INFO: No Floating Point Exceptions have been reported + 12,052,442,667 cycles:u # 3.378 GHz (74.95%) + 2,882,292 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.89%) + 6,350,179 stalled-cycles-backend:u # 0.05% backend cycles idle (75.01%) + 11,059,578,171 instructions:u # 0.92 insn per cycle + # 0.00 stalled cycles per insn (75.13%) + 3.609767733 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 6.626454e-04 -Avg ME (F77/GPU) = 6.6262669162351490E-004 -Relative difference = 2.8232862531213374e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 6.626836e-04 +Avg ME (F77/GPU) = 6.6271025603446138E-004 +Relative difference = 4.022437625032909e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.806823e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.807635e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.807635e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 -TOTAL : 28.247116 sec -INFO: No Floating Point Exceptions have been reported - 85,893,515,248 cycles # 3.041 GHz - 135,352,063,458 instructions # 1.58 insn per cycle - 28.251186288 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:15471) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.922308e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.922894e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.922894e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 +TOTAL : 27.698517 sec +INFO: No Floating Point Exceptions have been reported + 97,024,902,068 cycles:u # 3.503 GHz (74.99%) + 131,608,514 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.99%) + 5,804,063,067 stalled-cycles-backend:u # 5.98% backend cycles idle (74.99%) + 131,693,986,054 instructions:u # 1.36 insn per cycle + # 0.04 stalled cycles per insn (75.00%) + 27.705741729 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:16664) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275349662128086E-004 -Relative difference = 5.098002770919431e-09 +Avg ME (F77/C++) = 6.6275348053303901E-004 +Relative difference = 2.9372852846917734e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.048812e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.061380e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.061380e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 -TOTAL : 2.331526 sec -INFO: No Floating Point Exceptions have been reported - 6,855,274,765 cycles # 2.936 GHz - 19,472,640,725 instructions # 2.84 insn per cycle - 2.335711915 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69876) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.243608e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.255433e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.255433e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 +TOTAL : 1.996148 sec +INFO: No Floating Point Exceptions have been reported + 6,993,731,379 cycles:u # 3.499 GHz (74.89%) + 958,688 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.79%) + 2,969,879,973 stalled-cycles-backend:u # 42.46% backend cycles idle (74.75%) + 19,159,605,029 instructions:u # 2.74 insn per cycle + # 0.16 stalled cycles per insn (74.95%) + 2.003319478 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68769) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274862799683282E-004 -Relative difference = 4.2243518621014775e-08 +Avg ME (F77/C++) = 6.6274857155746575E-004 +Relative difference = 4.291602312495571e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.463700e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.469145e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.469145e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.125411 sec -INFO: No Floating Point Exceptions have been reported - 3,100,011,361 cycles # 2.746 GHz - 6,715,084,131 instructions # 2.17 insn per cycle - 1.129564678 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47692) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.443883e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.447559e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.447559e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 +TOTAL : 1.142259 sec +INFO: No Floating Point Exceptions have been reported + 3,998,873,840 cycles:u # 3.492 GHz (74.86%) + 52,085,002 stalled-cycles-frontend:u # 1.30% frontend cycles idle (74.85%) + 2,183,378,746 stalled-cycles-backend:u # 54.60% backend cycles idle (74.85%) + 6,643,476,000 instructions:u # 1.66 insn per cycle + # 0.33 stalled cycles per insn (74.85%) + 1.149023989 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47334) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731623419345E-004 -Relative difference = 2.449603850635964e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627274e-04 +Avg ME (F77/C++) = 6.6272735712090414E-004 +Relative difference = 6.470095531024898e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.701785e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.709182e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.709182e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 0.968805 sec -INFO: No Floating Point Exceptions have been reported - 2,625,966,040 cycles # 2.701 GHz - 5,966,391,975 instructions # 2.27 insn per cycle - 0.972890407 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41858) (512y: 13) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627273e-04 -Avg ME (F77/C++) = 6.6272731623419345E-004 -Relative difference = 2.449603850635964e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.484080e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.489679e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.489679e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 -TOTAL : 1.110163 sec -INFO: No Floating Point Exceptions have been reported - 2,071,498,058 cycles # 1.861 GHz - 3,487,792,468 instructions # 1.68 insn per cycle - 1.114282581 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4171) (512y: 4) (512z:44494) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627275e-04 -Avg ME (F77/C++) = 6.6272750247886592E-004 -Relative difference = 3.740400032174438e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 97e6470827..33cd2d7259 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_10:28:20 -DATE: 2024-10-02_22:30:41 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.316539e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.346233e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.348408e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.533376 sec -INFO: No Floating Point Exceptions have been reported - 2,273,630,859 cycles # 2.959 GHz - 3,530,304,224 instructions # 1.55 insn per cycle - 0.826605443 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.195655e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.256284e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.256437e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.582310 sec +INFO: No Floating Point Exceptions have been reported + 1,548,799,561 cycles:u # 2.707 GHz (75.20%) + 2,635,387 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.15%) + 7,844,789 stalled-cycles-backend:u # 0.51% backend cycles idle (76.11%) + 2,004,827,952 instructions:u # 1.29 insn per cycle + # 0.00 stalled cycles per insn (74.67%) + 0.637286518 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.119929e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.150275e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.151562e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.050268 sec -INFO: No Floating Point Exceptions have been reported - 9,709,254,510 cycles # 2.935 GHz - 13,370,261,279 instructions # 1.38 insn per cycle - 3.367751590 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.797960e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.804166e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.804281e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.074389 sec +INFO: No Floating Point Exceptions have been reported + 19,351,372,698 cycles:u # 3.173 GHz (75.09%) + 3,194,528 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.99%) + 5,306,036 stalled-cycles-backend:u # 0.03% backend cycles idle (74.98%) + 17,348,901,637 instructions:u # 0.90 insn per cycle + # 0.00 stalled cycles per insn (75.01%) + 6.141023183 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.915345e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.916261e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.916261e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.569018 sec -INFO: No Floating Point Exceptions have been reported - 25,934,368,405 cycles # 3.026 GHz - 79,430,143,870 instructions # 3.06 insn per cycle - 8.573244716 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4775) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.561544e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.562742e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.562742e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.411011 sec +INFO: No Floating Point Exceptions have been reported + 21,808,026,475 cycles:u # 3.400 GHz (74.92%) + 1,498,697 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) + 2,731,850,634 stalled-cycles-backend:u # 12.53% backend cycles idle (75.05%) + 78,797,940,588 instructions:u # 3.61 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 6.418516328 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4817) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.634190e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.637434e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.637434e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.519341 sec -INFO: No Floating Point Exceptions have been reported - 12,845,450,280 cycles # 2.841 GHz - 38,825,374,620 instructions # 3.02 insn per cycle - 4.523658769 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:13173) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.541327e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.546601e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.546601e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.967715 sec +INFO: No Floating Point Exceptions have been reported + 10,392,967,476 cycles:u # 3.499 GHz (74.96%) + 3,206,515 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.96%) + 1,346,372,943 stalled-cycles-backend:u # 12.95% backend cycles idle (74.96%) + 38,655,309,883 instructions:u # 3.72 insn per cycle + # 0.03 stalled cycles per insn (74.96%) + 2.974858471 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12020) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.419852e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.436995e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.436995e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.954378 sec -INFO: No Floating Point Exceptions have been reported - 5,613,587,439 cycles # 2.867 GHz - 13,617,535,847 instructions # 2.43 insn per cycle - 1.958653443 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11427) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.223037e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.225584e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.225584e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.349009 sec +INFO: No Floating Point Exceptions have been reported + 4,726,511,869 cycles:u # 3.496 GHz (74.83%) + 2,315,884 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.15%) + 463,423,377 stalled-cycles-backend:u # 9.80% backend cycles idle (75.15%) + 13,596,968,035 instructions:u # 2.88 insn per cycle + # 0.03 stalled cycles per insn (75.15%) + 1.356861285 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10261) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 +Avg ME (F77/C++) = 6.6266730409276836E-004 +Relative difference = 2.9563428359824236e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.634198e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.657060e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.657060e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.708774 sec -INFO: No Floating Point Exceptions have been reported - 4,864,533,016 cycles # 2.841 GHz - 12,296,957,793 instructions # 2.53 insn per cycle - 1.713075276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10331) (512y: 80) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.360180e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.374428e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.374428e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.234665 sec -INFO: No Floating Point Exceptions have been reported - 4,169,044,558 cycles # 1.863 GHz - 6,391,574,666 instructions # 1.53 insn per cycle - 2.238987087 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1983) (512y: 92) (512z: 9360) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index e533cb8a65..dd054f4226 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +DATE: 2024-10-04_10:28:43 -DATE: 2024-10-02_22:31:14 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.333573e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.363743e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.365714e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 0.533533 sec -INFO: No Floating Point Exceptions have been reported - 2,265,915,416 cycles # 2.955 GHz - 3,527,237,824 instructions # 1.56 insn per cycle - 0.825201688 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.219819e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.274835e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274989e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 +TOTAL : 0.538450 sec +INFO: No Floating Point Exceptions have been reported + 1,578,538,660 cycles:u # 2.872 GHz (75.85%) + 2,510,341 stalled-cycles-frontend:u # 0.16% frontend cycles idle (76.14%) + 6,851,856 stalled-cycles-backend:u # 0.43% backend cycles idle (75.54%) + 2,042,648,852 instructions:u # 1.29 insn per cycle + # 0.00 stalled cycles per insn (75.16%) + 0.592143001 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.131054e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.161865e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.163156e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 -TOTAL : 3.042026 sec -INFO: No Floating Point Exceptions have been reported - 9,721,344,649 cycles # 2.947 GHz - 14,284,197,890 instructions # 1.47 insn per cycle - 3.359293537 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.812105e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.818586e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.818703e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 +TOTAL : 6.030619 sec +INFO: No Floating Point Exceptions have been reported + 20,676,199,911 cycles:u # 3.416 GHz (74.93%) + 3,297,259 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.93%) + 7,355,442 stalled-cycles-backend:u # 0.04% backend cycles idle (74.91%) + 18,492,665,928 instructions:u # 0.89 insn per cycle + # 0.00 stalled cycles per insn (75.11%) + 6.094301410 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.920229e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.921140e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.921140e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 8.546949 sec -INFO: No Floating Point Exceptions have been reported - 25,998,282,864 cycles # 3.041 GHz - 79,450,746,897 instructions # 3.06 insn per cycle - 8.551213538 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4431) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.668544e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.669760e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.669760e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 6.153508 sec +INFO: No Floating Point Exceptions have been reported + 21,554,539,794 cycles:u # 3.501 GHz (74.99%) + 884,199 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.04%) + 2,816,905,461 stalled-cycles-backend:u # 13.07% backend cycles idle (74.99%) + 78,855,686,322 instructions:u # 3.66 insn per cycle + # 0.04 stalled cycles per insn (74.99%) + 6.161032029 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4763) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.656713e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.660030e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.660030e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 4.491295 sec -INFO: No Floating Point Exceptions have been reported - 12,816,709,585 cycles # 2.852 GHz - 38,780,987,144 instructions # 3.03 insn per cycle - 4.495553287 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12935) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.429343e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.434335e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.434335e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 3.028366 sec +INFO: No Floating Point Exceptions have been reported + 10,615,169,652 cycles:u # 3.502 GHz (74.93%) + 4,111,125 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.93%) + 1,389,155,199 stalled-cycles-backend:u # 13.09% backend cycles idle (74.93%) + 38,676,034,023 instructions:u # 3.64 insn per cycle + # 0.04 stalled cycles per insn (74.92%) + 3.036335947 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:11990) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.232154e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.248832e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.248832e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.998281 sec -INFO: No Floating Point Exceptions have been reported - 5,587,815,925 cycles # 2.792 GHz - 13,730,785,401 instructions # 2.46 insn per cycle - 2.002499994 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11510) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.229287e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.231865e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.231865e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.342031 sec +INFO: No Floating Point Exceptions have been reported + 4,712,479,104 cycles:u # 3.504 GHz (74.90%) + 2,278,012 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.02%) + 444,621,609 stalled-cycles-backend:u # 9.43% backend cycles idle (75.02%) + 13,604,129,685 instructions:u # 2.89 insn per cycle + # 0.03 stalled cycles per insn (75.02%) + 1.349775578 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10235) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 +Avg ME (F77/C++) = 6.6266730409276836E-004 +Relative difference = 2.9563428359824236e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.273072e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.294230e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.294230e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 1.774969 sec -INFO: No Floating Point Exceptions have been reported - 4,961,155,724 cycles # 2.790 GHz - 12,423,809,903 instructions # 2.50 insn per cycle - 1.779214057 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10322) (512y: 240) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.260898e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.274229e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.274229e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 -TOTAL : 2.266066 sec -INFO: No Floating Point Exceptions have been reported - 4,182,312,406 cycles # 1.843 GHz - 6,495,020,499 instructions # 1.55 insn per cycle - 2.270352700 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1806) (512y: 190) (512z: 9358) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276857E-004 -Relative difference = 2.956342832710188e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 58a216130e..a754646936 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-10-02_22:33:06 +DATE: 2024-10-04_10:29:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.059066e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.059482e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.059641e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.448019 sec -INFO: No Floating Point Exceptions have been reported - 8,346,552,119 cycles # 3.010 GHz - 17,505,316,851 instructions # 2.10 insn per cycle - 2.833264459 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.249682e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.251806e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.252033e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.012957 sec -INFO: No Floating Point Exceptions have been reported - 13,135,921,613 cycles # 3.025 GHz - 31,141,588,241 instructions # 2.37 insn per cycle - 4.400245474 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.899243e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.899462e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.899462e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.685169 sec -INFO: No Floating Point Exceptions have been reported - 18,964,432,627 cycles # 2.836 GHz - 53,903,774,133 instructions # 2.84 insn per cycle - 6.689349528 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.197107e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.197160e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.197160e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.431348 sec +INFO: No Floating Point Exceptions have been reported + 15,441,098,907 cycles:u # 3.496 GHz (75.01%) + 9,894,890 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) + 1,614,181,861 stalled-cycles-backend:u # 10.45% backend cycles idle (75.00%) + 53,530,475,903 instructions:u # 3.47 insn per cycle + # 0.03 stalled cycles per insn (75.00%) + 4.438636757 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.626145e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.626234e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.626234e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.249075 sec -INFO: No Floating Point Exceptions have been reported - 9,790,241,271 cycles # 3.010 GHz - 27,152,279,760 instructions # 2.77 insn per cycle - 3.253283773 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.340376e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.340512e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.340512e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.258121 sec +INFO: No Floating Point Exceptions have been reported + 7,906,059,909 cycles:u # 3.497 GHz (74.88%) + 1,356,724 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.88%) + 767,966,259 stalled-cycles-backend:u # 9.71% backend cycles idle (74.90%) + 27,078,328,956 instructions:u # 3.43 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 2.265122330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.533274e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.533700e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.533700e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.496587 sec -INFO: No Floating Point Exceptions have been reported - 4,263,425,533 cycles # 2.842 GHz - 9,591,372,936 instructions # 2.25 insn per cycle - 1.500755370 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.201588e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.202076e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.202076e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.017046 sec +INFO: No Floating Point Exceptions have been reported + 3,564,414,743 cycles:u # 3.495 GHz (74.97%) + 1,108,073 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.90%) + 310,098,984 stalled-cycles-backend:u # 8.70% backend cycles idle (74.90%) + 9,561,959,007 instructions:u # 2.68 insn per cycle + # 0.03 stalled cycles per insn (74.90%) + 1.024814784 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 +Avg ME (F77/C++) = 9.8722595285459444E-003 +Relative difference = 3.5163711246052657e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.966938e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.967470e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.967470e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.332801 sec -INFO: No Floating Point Exceptions have been reported - 3,736,922,615 cycles # 2.796 GHz - 8,515,084,014 instructions # 2.28 insn per cycle - 1.337097137 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 90) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.547498e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.548061e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.548061e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.490279 sec -INFO: No Floating Point Exceptions have been reported - 2,700,551,857 cycles # 1.808 GHz - 4,281,722,844 instructions # 1.59 insn per cycle - 1.494618048 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2856) (512y: 102) (512z:79114) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 1615b7402d..1ca1764591 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,96 +19,35 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-10-02_23:03:58 +DATE: 2024-10-04_11:16:19 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.055259e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.057350e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.057350e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.372375 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 8,116,434,360 cycles # 3.010 GHz - 18,416,481,934 instructions # 2.27 insn per cycle - 2.753979421 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.189805e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.222017e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.222017e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.994979 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 13,081,625,338 cycles # 3.026 GHz - 28,387,877,176 instructions # 2.17 insn per cycle - 4.377406416 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.186410e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.186644e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.186644e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.462059 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 19,169,468,026 cycles # 2.965 GHz - 53,903,983,718 instructions # 2.81 insn per cycle - 6.466524182 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.202290e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.202327e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.202327e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.394959 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 15,395,630,820 cycles:u # 3.501 GHz (74.90%) + 7,691,352 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.94%) + 1,587,810,411 stalled-cycles-backend:u # 10.31% backend cycles idle (75.03%) + 53,478,307,867 instructions:u # 3.47 insn per cycle + # 0.03 stalled cycles per insn (75.08%) + 4.402757479 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -116,33 +55,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.623131e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.623222e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.623222e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.254596 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 9,880,100,026 cycles # 3.033 GHz - 27,153,310,266 instructions # 2.75 insn per cycle - 3.259041098 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.348643e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.348774e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.348774e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.250266 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,882,659,724 cycles:u # 3.498 GHz (74.82%) + 2,291,383 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.76%) + 810,603,743 stalled-cycles-backend:u # 10.28% backend cycles idle (74.94%) + 27,087,969,672 instructions:u # 3.44 insn per cycle + # 0.03 stalled cycles per insn (75.15%) + 2.258094292 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -150,33 +92,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.505113e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.505536e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.505536e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.508139 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,272,653,512 cycles # 2.826 GHz - 9,594,202,047 instructions # 2.25 insn per cycle - 1.512512017 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.138759e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.139242e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.139242e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.029966 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,604,197,914 cycles:u # 3.489 GHz (74.55%) + 1,430,799 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.92%) + 303,526,117 stalled-cycles-backend:u # 8.42% backend cycles idle (75.22%) + 9,570,463,697 instructions:u # 2.66 insn per cycle + # 0.03 stalled cycles per insn (75.22%) + 1.037411100 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -184,80 +129,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 +Avg ME (F77/C++) = 9.8722595285459444E-003 +Relative difference = 3.5163711246052657e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.983827e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.984375e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.984375e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.327337 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,731,860,346 cycles # 2.803 GHz - 8,517,006,189 instructions # 2.28 insn per cycle - 1.331804367 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 90) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.634471e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.635161e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.635161e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.454363 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,703,496,141 cycles # 1.854 GHz - 4,284,293,846 instructions # 1.58 insn per cycle - 1.458845276 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2856) (512y: 102) (512z:79114) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 3a68912814..52d5d80fe7 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-10-02_22:34:32 +DATE: 2024-10-04_10:30:20 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.055952e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.056442e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.056602e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 2.449389 sec -INFO: No Floating Point Exceptions have been reported - 8,348,082,530 cycles # 3.004 GHz - 16,524,233,578 instructions # 1.98 insn per cycle - 2.837366535 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.258307e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.260215e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.260440e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 4.014474 sec -INFO: No Floating Point Exceptions have been reported - 13,153,845,841 cycles # 3.028 GHz - 31,087,113,730 instructions # 2.36 insn per cycle - 4.401303970 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722595284406640E-003 -Relative difference = 3.5164777671934515e-07 -OK (relative difference <= 5E-3) +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.940699e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.940944e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.940944e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.640802 sec -INFO: No Floating Point Exceptions have been reported - 18,841,020,722 cycles # 2.836 GHz - 53,933,535,215 instructions # 2.86 insn per cycle - 6.644982679 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32022) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.182021e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.182059e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.182059e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.468565 sec +INFO: No Floating Point Exceptions have been reported + 15,634,721,907 cycles:u # 3.497 GHz (74.95%) + 5,085,110 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.95%) + 1,647,500,835 stalled-cycles-backend:u # 10.54% backend cycles idle (74.95%) + 53,473,632,621 instructions:u # 3.42 insn per cycle + # 0.03 stalled cycles per insn (74.98%) + 4.476299042 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44484) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.601269e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.601355e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.601355e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 3.298877 sec -INFO: No Floating Point Exceptions have been reported - 9,967,394,924 cycles # 3.018 GHz - 27,130,116,099 instructions # 2.72 insn per cycle - 3.303134949 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96368) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.347167e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.347309e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.347309e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.251744 sec +INFO: No Floating Point Exceptions have been reported + 7,882,137,396 cycles:u # 3.496 GHz (74.81%) + 15,287,325 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.82%) + 758,625,123 stalled-cycles-backend:u # 9.62% backend cycles idle (74.97%) + 27,083,240,161 instructions:u # 3.44 insn per cycle + # 0.03 stalled cycles per insn (75.13%) + 2.258698525 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.524300e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.524716e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.524716e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.500483 sec -INFO: No Floating Point Exceptions have been reported - 4,288,401,155 cycles # 2.852 GHz - 9,585,756,274 instructions # 2.24 insn per cycle - 1.504684164 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84968) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.209504e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.209984e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.209984e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.015075 sec +INFO: No Floating Point Exceptions have been reported + 3,546,929,063 cycles:u # 3.484 GHz (74.87%) + 1,200,895 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.86%) + 274,080,517 stalled-cycles-backend:u # 7.73% backend cycles idle (74.86%) + 9,561,199,112 instructions:u # 2.70 insn per cycle + # 0.03 stalled cycles per insn (74.86%) + 1.023044859 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83752) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 +Avg ME (F77/C++) = 9.8722595285459444E-003 +Relative difference = 3.5163711246052657e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.003171e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.003722e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.003722e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.320958 sec -INFO: No Floating Point Exceptions have been reported - 3,744,622,204 cycles # 2.828 GHz - 8,508,595,657 instructions # 2.27 insn per cycle - 1.325042842 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80632) (512y: 240) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.615962e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.616495e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.616495e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 1.462041 sec -INFO: No Floating Point Exceptions have been reported - 2,701,843,389 cycles # 1.843 GHz - 4,281,298,665 instructions # 1.58 insn per cycle - 1.466469773 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2693) (512y: 184) (512z:79098) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285411531E-003 -Relative difference = 3.516375977906115e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index c5830d5029..08f0618e5c 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-10-02_22:35:58 +DATE: 2024-10-04_10:30:57 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.207882e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.208719e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.208944e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.755815 sec -INFO: No Floating Point Exceptions have been reported - 6,030,784,063 cycles # 2.986 GHz - 12,690,536,183 instructions # 2.10 insn per cycle - 2.076295584 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.154878e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.155502e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.155595e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 -TOTAL : 2.055928 sec -INFO: No Floating Point Exceptions have been reported - 6,993,860,684 cycles # 3.012 GHz - 14,389,037,711 instructions # 2.06 insn per cycle - 2.378610677 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849635e-03 -Avg ME (F77/GPU) = 9.8712451931260159E-003 -Relative difference = 0.0021940095370046923 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.791338e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.791603e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.791603e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.007057 sec -INFO: No Floating Point Exceptions have been reported - 18,246,753,562 cycles # 3.036 GHz - 53,910,639,040 instructions # 2.95 insn per cycle - 6.011238409 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.079901e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.079923e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.079923e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 +TOTAL : 4.889350 sec +INFO: No Floating Point Exceptions have been reported + 17,110,757,262 cycles:u # 3.498 GHz (74.98%) + 101,242,552 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.98%) + 1,834,928,824 stalled-cycles-backend:u # 10.72% backend cycles idle (74.98%) + 54,147,547,381 instructions:u # 3.16 insn per cycle + # 0.03 stalled cycles per insn (74.98%) + 4.896725658 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087551509E-003 -Relative difference = 2.119780432912131e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855168e-03 +Avg ME (F77/C++) = 9.8551676614203575E-003 +Relative difference = 3.4355542366580335e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.482340e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.482762e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.482762e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.518087 sec -INFO: No Floating Point Exceptions have been reported - 4,616,306,696 cycles # 3.034 GHz - 13,807,478,566 instructions # 2.99 insn per cycle - 1.522256201 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.719996e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.720382e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.720382e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 +TOTAL : 1.119334 sec +INFO: No Floating Point Exceptions have been reported + 3,914,853,183 cycles:u # 3.489 GHz (75.05%) + 50,519,645 stalled-cycles-frontend:u # 1.29% frontend cycles idle (75.05%) + 382,127,369 stalled-cycles-backend:u # 9.76% backend cycles idle (75.05%) + 13,751,093,710 instructions:u # 3.51 insn per cycle + # 0.03 stalled cycles per insn (75.05%) + 1.126666107 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896367235E-003 -Relative difference = 3.1515505172940424e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855164e-03 +Avg ME (F77/C++) = 9.8551639361110794E-003 +Relative difference = 6.48278610035626e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.020421e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.022190e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.022190e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.754330 sec -INFO: No Floating Point Exceptions have been reported - 2,137,577,296 cycles # 2.820 GHz - 4,836,841,238 instructions # 2.26 insn per cycle - 0.758604558 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.014472e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.014627e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.014627e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 +TOTAL : 0.522139 sec +INFO: No Floating Point Exceptions have been reported + 1,831,942,479 cycles:u # 3.489 GHz (74.58%) + 15,822,916 stalled-cycles-frontend:u # 0.86% frontend cycles idle (74.15%) + 162,914,012 stalled-cycles-backend:u # 8.89% backend cycles idle (74.22%) + 4,832,252,888 instructions:u # 2.64 insn per cycle + # 0.03 stalled cycles per insn (74.98%) + 0.529423018 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.836478e-03 +Avg ME (F77/C++) = 9.8364784946823516E-003 +Relative difference = 5.0290597139820844e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.912780e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.914883e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.914883e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.669071 sec -INFO: No Floating Point Exceptions have been reported - 1,900,823,035 cycles # 2.826 GHz - 4,291,171,823 instructions # 2.26 insn per cycle - 0.673206807 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81183) (512y: 45) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.288558e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.290700e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.290700e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.727738 sec -INFO: No Floating Point Exceptions have been reported - 1,355,809,114 cycles # 1.853 GHz - 2,162,656,295 instructions # 1.60 insn per cycle - 0.732221235 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3481) (512y: 45) (512z:79330) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982676284E-003 -Relative difference = 2.004124217057488e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 725d6753a9..5f9dc096d3 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,96 +19,35 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-10-02_23:05:24 +DATE: 2024-10-04_11:16:56 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.294446e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.299887e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.299887e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187093e-05 +- 9.825663e-06 ) GeV^-6 -TOTAL : 1.676123 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,803,384,426 cycles # 2.997 GHz - 12,435,271,508 instructions # 2.14 insn per cycle - 1.992620080 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.134524e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.145734e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.145734e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856440e-04 +- 8.331091e-05 ) GeV^-6 -TOTAL : 2.020497 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,855,684,842 cycles # 3.005 GHz - 14,918,783,289 instructions # 2.18 insn per cycle - 2.337019864 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849635e-03 -Avg ME (F77/GPU) = 9.8712451931260159E-003 -Relative difference = 0.0021940095370046923 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.807568e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.807845e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.807845e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 5.997723 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 18,158,608,631 cycles # 3.026 GHz - 53,912,576,507 instructions # 2.97 insn per cycle - 6.001895502 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.072401e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.072422e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.072422e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 +TOTAL : 4.923168 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 17,240,159,807 cycles:u # 3.500 GHz (74.96%) + 101,183,646 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.99%) + 1,888,003,325 stalled-cycles-backend:u # 10.95% backend cycles idle (74.99%) + 54,161,007,670 instructions:u # 3.14 insn per cycle + # 0.03 stalled cycles per insn (74.99%) + 4.930553656 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -116,33 +55,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087551509E-003 -Relative difference = 2.119780432912131e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855168e-03 +Avg ME (F77/C++) = 9.8551676614203575E-003 +Relative difference = 3.4355542366580335e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.488685e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.489192e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.489192e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.515485 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,602,710,939 cycles # 3.030 GHz - 13,809,381,685 instructions # 3.00 insn per cycle - 1.519902029 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.902451e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.902893e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.902893e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 +TOTAL : 1.079324 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,786,149,577 cycles:u # 3.498 GHz (74.73%) + 774,965 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.87%) + 367,116,707 stalled-cycles-backend:u # 9.70% backend cycles idle (74.87%) + 13,749,519,327 instructions:u # 3.63 insn per cycle + # 0.03 stalled cycles per insn (74.87%) + 1.086555722 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -150,33 +92,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896367235E-003 -Relative difference = 3.1515505172940424e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855164e-03 +Avg ME (F77/C++) = 9.8551639361110794E-003 +Relative difference = 6.48278610035626e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.102201e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.103949e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.103949e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.745530 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,129,106,437 cycles # 2.842 GHz - 4,838,834,024 instructions # 2.27 insn per cycle - 0.749838678 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.042807e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.042969e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.042969e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 +TOTAL : 0.509031 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,781,439,195 cycles:u # 3.480 GHz (75.03%) + 229,611 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) + 143,418,602 stalled-cycles-backend:u # 8.05% backend cycles idle (75.01%) + 4,815,198,676 instructions:u # 2.70 insn per cycle + # 0.03 stalled cycles per insn (75.01%) + 0.516024912 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -184,80 +129,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.836478e-03 +Avg ME (F77/C++) = 9.8364784946823516E-003 +Relative difference = 5.0290597139820844e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.914657e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.916750e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.916750e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.669155 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,905,705,276 cycles # 2.833 GHz - 4,293,242,906 instructions # 2.25 insn per cycle - 0.673440078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81183) (512y: 45) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091246E-003 -Relative difference = 1.8588029579156084e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.205807e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.208130e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.208130e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.736316 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,359,100,452 cycles # 1.836 GHz - 2,164,753,539 instructions # 1.59 insn per cycle - 0.740818713 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3481) (512y: 45) (512z:79330) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982676284E-003 -Relative difference = 2.004124217057488e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index d9277e9262..03c4dcf765 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-10-02_22:36:59 +DATE: 2024-10-04_10:31:29 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.201907e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.202602e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.202848e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 -TOTAL : 1.759755 sec -INFO: No Floating Point Exceptions have been reported - 6,041,131,533 cycles # 2.987 GHz - 12,887,925,845 instructions # 2.13 insn per cycle - 2.079278840 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.142501e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.143086e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.143184e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 -TOTAL : 2.062982 sec -INFO: No Floating Point Exceptions have been reported - 7,025,736,377 cycles # 3.016 GHz - 14,376,566,106 instructions # 2.05 insn per cycle - 2.386284867 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.849635e-03 -Avg ME (F77/GPU) = 9.8712451931260107E-003 -Relative difference = 0.0021940095370041636 -OK (relative difference <= 5E-3) -========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 8.806311e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.806570e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.806570e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 -TOTAL : 6.000091 sec -INFO: No Floating Point Exceptions have been reported - 18,259,581,889 cycles # 3.042 GHz - 53,898,592,963 instructions # 2.95 insn per cycle - 6.004360411 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.080560e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.080582e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.080582e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 +TOTAL : 4.886139 sec +INFO: No Floating Point Exceptions have been reported + 17,112,350,866 cycles:u # 3.500 GHz (74.97%) + 102,359,219 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.97%) + 1,775,063,311 stalled-cycles-backend:u # 10.37% backend cycles idle (74.97%) + 54,141,024,086 instructions:u # 3.16 insn per cycle + # 0.03 stalled cycles per insn (74.98%) + 4.894054989 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:33154) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847961e-03 -Avg ME (F77/C++) = 9.8479612087572898E-003 -Relative difference = 2.1198021522715588e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855168e-03 +Avg ME (F77/C++) = 9.8551676614199186E-003 +Relative difference = 3.435558690007174e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.506868e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.507352e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.507352e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 -TOTAL : 1.507769 sec -INFO: No Floating Point Exceptions have been reported - 4,592,889,606 cycles # 3.040 GHz - 13,800,588,544 instructions # 3.00 insn per cycle - 1.511992304 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96651) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.921510e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.921954e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.921954e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 +TOTAL : 1.074522 sec +INFO: No Floating Point Exceptions have been reported + 3,759,358,418 cycles:u # 3.489 GHz (74.76%) + 649,246 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.76%) + 360,850,463 stalled-cycles-backend:u # 9.60% backend cycles idle (74.69%) + 13,770,148,457 instructions:u # 3.66 insn per cycle + # 0.03 stalled cycles per insn (75.06%) + 1.082360075 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95973) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.847955e-03 -Avg ME (F77/C++) = 9.8479546896065809E-003 -Relative difference = 3.151856596628469e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.855164e-03 +Avg ME (F77/C++) = 9.8551639361110794E-003 +Relative difference = 6.48278610035626e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.927112e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.928805e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.928805e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.764116 sec -INFO: No Floating Point Exceptions have been reported - 2,152,921,246 cycles # 2.805 GHz - 4,840,961,497 instructions # 2.25 insn per cycle - 0.768293313 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85884) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.040829e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.040996e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.040996e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 +TOTAL : 0.509322 sec +INFO: No Floating Point Exceptions have been reported + 1,787,072,903 cycles:u # 3.486 GHz (75.12%) + 431,670 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.04%) + 140,499,187 stalled-cycles-backend:u # 7.86% backend cycles idle (75.04%) + 4,812,515,332 instructions:u # 2.69 insn per cycle + # 0.03 stalled cycles per insn (75.04%) + 0.516905560 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84309) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091923E-003 -Relative difference = 1.85880227405429e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.836478e-03 +Avg ME (F77/C++) = 9.8364784946823516E-003 +Relative difference = 5.0290597139820844e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.901326e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.903485e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.903485e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 -TOTAL : 0.669713 sec -INFO: No Floating Point Exceptions have been reported - 1,899,776,233 cycles # 2.822 GHz - 4,295,171,210 instructions # 2.26 insn per cycle - 0.673880897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81725) (512y: 25) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892973e-03 -Avg ME (F77/C++) = 9.8929728161091923E-003 -Relative difference = 1.85880227405429e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.249891e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.252145e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.252145e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 -TOTAL : 0.730611 sec -INFO: No Floating Point Exceptions have been reported - 1,361,058,670 cycles # 1.854 GHz - 2,169,526,438 instructions # 1.59 insn per cycle - 0.734943392 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4092) (512y: 32) (512z:79551) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.892981e-03 -Avg ME (F77/C++) = 9.8929811982957326E-003 -Relative difference = 2.0044082998332894e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 9d0b73e163..116046dfb8 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-10-02_22:38:01 +DATE: 2024-10-04_10:32:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.666751e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.667250e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.667415e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.193907 sec -INFO: No Floating Point Exceptions have been reported - 7,630,208,470 cycles # 3.025 GHz - 15,813,975,042 instructions # 2.07 insn per cycle - 2.578598510 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.108221e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.108518e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.108553e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.432158 sec -INFO: No Floating Point Exceptions have been reported - 11,402,912,009 cycles # 3.032 GHz - 24,689,535,297 instructions # 2.17 insn per cycle - 3.818442336 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 -OK (relative difference <= 5E-3) +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.867089e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.867297e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.867297e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.713479 sec -INFO: No Floating Point Exceptions have been reported - 19,196,861,628 cycles # 2.858 GHz - 54,133,636,915 instructions # 2.82 insn per cycle - 6.717705413 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32000) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.203416e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.203454e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.203454e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.390036 sec +INFO: No Floating Point Exceptions have been reported + 15,382,779,589 cycles:u # 3.502 GHz (74.94%) + 2,323,654 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) + 1,701,905,344 stalled-cycles-backend:u # 11.06% backend cycles idle (75.05%) + 53,720,490,538 instructions:u # 3.49 insn per cycle + # 0.03 stalled cycles per insn (75.05%) + 4.396995633 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44590) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.575052e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.575140e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.575140e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.353105 sec -INFO: No Floating Point Exceptions have been reported - 9,514,230,425 cycles # 2.835 GHz - 26,187,858,352 instructions # 2.75 insn per cycle - 3.357249981 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:96049) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.492350e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.492497e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.492497e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.120384 sec +INFO: No Floating Point Exceptions have been reported + 7,427,822,621 cycles:u # 3.498 GHz (74.84%) + 2,036,263 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.03%) + 811,533,037 stalled-cycles-backend:u # 10.93% backend cycles idle (75.13%) + 25,862,271,774 instructions:u # 3.48 insn per cycle + # 0.03 stalled cycles per insn (75.13%) + 2.144395965 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95377) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.700128e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.700595e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.700595e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.429975 sec -INFO: No Floating Point Exceptions have been reported - 4,074,429,263 cycles # 2.842 GHz - 9,249,195,343 instructions # 2.27 insn per cycle - 1.434239548 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.284060e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.284564e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.284564e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 1.000498 sec +INFO: No Floating Point Exceptions have been reported + 3,492,343,263 cycles:u # 3.481 GHz (74.68%) + 49,955,347 stalled-cycles-frontend:u # 1.43% frontend cycles idle (75.08%) + 306,069,910 stalled-cycles-backend:u # 8.76% backend cycles idle (75.29%) + 9,109,427,934 instructions:u # 2.61 insn per cycle + # 0.03 stalled cycles per insn (75.29%) + 1.007321016 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82824) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.266422e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.267083e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.267083e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.240358 sec -INFO: No Floating Point Exceptions have been reported - 3,512,291,376 cycles # 2.824 GHz - 8,183,196,831 instructions # 2.33 insn per cycle - 1.244579165 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80015) (512y: 80) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.600907e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.601474e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.601474e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.469084 sec -INFO: No Floating Point Exceptions have been reported - 2,662,106,284 cycles # 1.808 GHz - 4,173,178,161 instructions # 1.57 insn per cycle - 1.473471448 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2615) (512y: 92) (512z:78910) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 559bd31d07..5982c7fe15 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,80 +19,33 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. - -DATE: 2024-10-02_22:39:25 +DATE: 2024-10-04_10:32:37 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.671708e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.672224e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.672401e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 2.196836 sec -INFO: No Floating Point Exceptions have been reported - 7,586,412,190 cycles # 3.005 GHz - 16,831,088,475 instructions # 2.22 insn per cycle - 2.584515718 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% -......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.106090e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.106386e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.106418e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 -TOTAL : 3.438799 sec -INFO: No Floating Point Exceptions have been reported - 11,376,125,932 cycles # 3.016 GHz - 26,554,562,579 instructions # 2.33 insn per cycle - 3.828018149 seconds time elapsed -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 9.872263e-03 -Avg ME (F77/GPU) = 9.8722599015656498E-003 -Relative difference = 3.1385249252060663e-07 -OK (relative difference <= 5E-3) +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 7.838588e+01 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.838795e+01 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.838795e+01 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 -TOTAL : 6.729469 sec -INFO: No Floating Point Exceptions have been reported - 19,118,150,644 cycles # 2.840 GHz - 54,162,338,740 instructions # 2.83 insn per cycle - 6.733611093 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:32202) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.175474e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175513e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175513e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 4.492696 sec +INFO: No Floating Point Exceptions have been reported + 15,677,051,375 cycles:u # 3.488 GHz (74.88%) + 7,593,419 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.91%) + 1,678,941,636 stalled-cycles-backend:u # 10.71% backend cycles idle (74.95%) + 53,738,210,249 instructions:u # 3.43 insn per cycle + # 0.03 stalled cycles per insn (75.04%) + 4.499980758 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:44515) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -100,31 +53,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.612496e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.612591e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.612591e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 3.276928 sec -INFO: No Floating Point Exceptions have been reported - 9,293,469,250 cycles # 2.833 GHz - 26,089,245,195 instructions # 2.81 insn per cycle - 3.281183397 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95935) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.497111e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.497256e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.497256e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 2.116118 sec +INFO: No Floating Point Exceptions have been reported + 7,419,969,367 cycles:u # 3.502 GHz (74.75%) + 1,956,530 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.94%) + 790,000,842 stalled-cycles-backend:u # 10.65% backend cycles idle (75.08%) + 25,753,798,107 instructions:u # 3.47 insn per cycle + # 0.03 stalled cycles per insn (75.08%) + 2.137334693 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95039) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -132,31 +88,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.692288e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.692744e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.692744e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.434426 sec -INFO: No Floating Point Exceptions have been reported - 4,061,133,652 cycles # 2.824 GHz - 9,213,647,458 instructions # 2.27 insn per cycle - 1.438661249 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83864) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.582380e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.582941e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.582941e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 +TOTAL : 0.947983 sec +INFO: No Floating Point Exceptions have been reported + 3,318,902,094 cycles:u # 3.490 GHz (74.78%) + 491,341 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.76%) + 258,519,019 stalled-cycles-backend:u # 7.79% backend cycles idle (74.76%) + 9,040,296,434 instructions:u # 2.72 insn per cycle + # 0.03 stalled cycles per insn (74.78%) + 0.955766528 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82125) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -164,76 +123,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.284969e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.285585e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.285585e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.235575 sec -INFO: No Floating Point Exceptions have been reported - 3,509,658,458 cycles # 2.833 GHz - 8,168,658,311 instructions # 2.33 insn per cycle - 1.239748090 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79421) (512y: 230) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.726305e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.726893e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.726893e+02 ) sec^-1 -MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 -TOTAL : 1.419482 sec -INFO: No Floating Point Exceptions have been reported - 2,625,028,267 cycles # 1.845 GHz - 4,167,468,567 instructions # 1.59 insn per cycle - 1.423823222 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1879) (512y: 174) (512z:78884) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722594324461913E-003 -Relative difference = 3.613714310412983e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 37f0f4c146..f66367ad66 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-10-04_10:29:05 -DATE: 2024-10-02_22:31:48 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.834826e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.929186e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.043914e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.458579 sec -INFO: No Floating Point Exceptions have been reported - 1,990,123,139 cycles # 2.953 GHz - 2,784,480,859 instructions # 1.40 insn per cycle - 0.733197576 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 3.080649e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.567361e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.576990e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.219643e+03 +- 1.210703e+03 ) GeV^-2 +TOTAL : 0.364406 sec +INFO: No Floating Point Exceptions have been reported + 962,974,955 cycles:u # 2.641 GHz (75.14%) + 2,496,617 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.03%) + 5,095,285 stalled-cycles-backend:u # 0.53% backend cycles idle (76.14%) + 1,449,498,115 instructions:u # 1.51 insn per cycle + # 0.00 stalled cycles per insn (77.08%) + 0.416440309 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.981412e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.496464e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.730696e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.543487 sec -INFO: No Floating Point Exceptions have been reported - 2,322,895,437 cycles # 2.968 GHz - 3,227,685,027 instructions # 1.39 insn per cycle - 0.842253747 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.957014e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.678838e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.694069e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.605124e+02 +- 5.694382e+02 ) GeV^-2 +TOTAL : 0.489555 sec +INFO: No Floating Point Exceptions have been reported + 1,280,261,518 cycles:u # 2.506 GHz (76.89%) + 2,414,688 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.74%) + 7,024,109 stalled-cycles-backend:u # 0.55% backend cycles idle (74.98%) + 1,750,089,062 instructions:u # 1.37 insn per cycle + # 0.00 stalled cycles per insn (74.51%) + 0.548004651 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 +Avg ME (F77/GPU) = 0.14247482467490469 +Relative difference = 5.286902836925003e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.098188e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.121629e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.121629e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.511028 sec -INFO: No Floating Point Exceptions have been reported - 4,619,987,849 cycles # 3.050 GHz - 13,190,822,149 instructions # 2.86 insn per cycle - 1.515227589 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.449138e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.478107e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.478107e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.154326 sec +INFO: No Floating Point Exceptions have been reported + 4,027,250,976 cycles:u # 3.480 GHz (75.12%) + 2,661,759 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.12%) + 809,783,805 stalled-cycles-backend:u # 20.11% backend cycles idle (75.12%) + 13,130,611,823 instructions:u # 3.26 insn per cycle + # 0.06 stalled cycles per insn (75.12%) + 1.161554843 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.922055e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.994654e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.994654e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.871134 sec -INFO: No Floating Point Exceptions have been reported - 2,634,578,151 cycles # 3.012 GHz - 7,554,878,218 instructions # 2.87 insn per cycle - 0.875291158 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.509870e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.596568e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.596568e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.676181 sec +INFO: No Floating Point Exceptions have been reported + 2,361,369,338 cycles:u # 3.477 GHz (75.00%) + 2,082,729 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.27%) + 645,346,736 stalled-cycles-backend:u # 27.33% backend cycles idle (75.27%) + 7,468,617,395 instructions:u # 3.16 insn per cycle + # 0.09 stalled cycles per insn (75.27%) + 0.683368779 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3010) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.211416e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.420508e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.420508e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.529658 sec -INFO: No Floating Point Exceptions have been reported - 1,488,293,928 cycles # 2.791 GHz - 3,159,946,212 instructions # 2.12 insn per cycle - 0.533835521 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.772164e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.100284e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.100284e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.366910 sec +INFO: No Floating Point Exceptions have been reported + 1,284,395,524 cycles:u # 3.472 GHz (74.43%) + 1,919,279 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.06%) + 224,523,956 stalled-cycles-backend:u # 17.48% backend cycles idle (74.06%) + 3,088,983,186 instructions:u # 2.41 insn per cycle + # 0.07 stalled cycles per insn (74.40%) + 0.374787504 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2888) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.512087e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.763823e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.763823e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.486021 sec -INFO: No Floating Point Exceptions have been reported - 1,346,900,449 cycles # 2.750 GHz - 3,013,892,972 instructions # 2.24 insn per cycle - 0.490326977 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2749) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.472318e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.592196e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.592196e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.683635 sec -INFO: No Floating Point Exceptions have been reported - 1,324,488,225 cycles # 1.928 GHz - 1,962,344,375 instructions # 1.48 insn per cycle - 0.687834799 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index edac9efaa0..c1bb71aaa3 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,97 +1,77 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-10-04_11:15:24 -DATE: 2024-10-02_23:02:33 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.357617e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.567301e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.567301e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.480710 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,054,637,495 cycles # 2.959 GHz - 3,064,097,821 instructions # 1.49 insn per cycle - 0.751345984 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 2.208003e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.457307e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.457307e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.511863 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,531,477,497 cycles:u # 2.906 GHz (74.22%) + 6,663,983 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.53%) + 271,116,364 stalled-cycles-backend:u # 17.70% backend cycles idle (74.50%) + 1,914,127,148 instructions:u # 1.25 insn per cycle + # 0.14 stalled cycles per insn (74.91%) + 0.561318051 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.284276e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.260264e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.260264e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.756366 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,992,488,404 cycles # 2.973 GHz - 4,533,320,753 instructions # 1.51 insn per cycle - 1.065306552 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.016150e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.168560e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.168560e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.217284e+03 +- 8.156969e+02 ) GeV^-2 +TOTAL : 1.118560 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,337,834,991 cycles:u # 2.897 GHz (74.89%) + 16,778,113 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.69%) + 838,698,020 stalled-cycles-backend:u # 25.13% backend cycles idle (75.00%) + 3,491,444,280 instructions:u # 1.05 insn per cycle + # 0.24 stalled cycles per insn (75.00%) + 1.192167863 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -99,35 +79,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 +Avg ME (F77/GPU) = 0.14247482467490469 +Relative difference = 5.286902836925003e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.096875e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.120294e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.120294e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.518699 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,650,030,796 cycles # 3.055 GHz - 13,198,473,845 instructions # 2.84 insn per cycle - 1.523176274 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.409731e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.436914e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.436914e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.190081 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,165,664,768 cycles:u # 3.490 GHz (74.60%) + 1,999,089 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.91%) + 936,146,357 stalled-cycles-backend:u # 22.47% backend cycles idle (75.19%) + 13,139,188,653 instructions:u # 3.15 insn per cycle + # 0.07 stalled cycles per insn (75.20%) + 1.197508250 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -135,33 +116,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.939375e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.011645e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.011645e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.870214 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,669,019,724 cycles # 3.054 GHz - 7,604,492,901 instructions # 2.85 insn per cycle - 0.874664100 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.509767e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.596497e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.596497e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.680631 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,380,899,151 cycles:u # 3.481 GHz (74.36%) + 2,045,113 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.37%) + 645,751,963 stalled-cycles-backend:u # 27.12% backend cycles idle (74.92%) + 7,502,034,938 instructions:u # 3.15 insn per cycle + # 0.09 stalled cycles per insn (75.41%) + 0.687987997 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3010) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -169,33 +153,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.240225e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.449199e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.449199e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.531313 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,520,382,878 cycles # 2.841 GHz - 3,208,340,410 instructions # 2.11 insn per cycle - 0.535666139 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.752340e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.077854e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.077854e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.372240 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,294,679,735 cycles:u # 3.448 GHz (74.46%) + 2,052,234 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.45%) + 224,523,524 stalled-cycles-backend:u # 17.34% backend cycles idle (74.45%) + 3,103,979,727 instructions:u # 2.40 insn per cycle + # 0.07 stalled cycles per insn (74.56%) + 0.379789000 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2888) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -203,80 +190,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.608215e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.869332e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.869332e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.480406 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,381,392,428 cycles # 2.852 GHz - 3,064,436,632 instructions # 2.22 insn per cycle - 0.484872552 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2749) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.420993e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.538745e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.538745e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.705713 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,370,817,527 cycles # 1.932 GHz - 2,002,052,233 instructions # 1.46 insn per cycle - 0.710306404 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index f87fba715e..862764ef6e 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-10-04_10:29:12 -DATE: 2024-10-02_22:32:01 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.806684e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.878937e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.003620e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.457908 sec -INFO: No Floating Point Exceptions have been reported - 1,992,366,483 cycles # 2.953 GHz - 2,806,396,880 instructions # 1.41 insn per cycle - 0.732986277 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 3.150743e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.704934e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.715045e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.219643e+03 +- 1.210703e+03 ) GeV^-2 +TOTAL : 0.351090 sec +INFO: No Floating Point Exceptions have been reported + 927,250,962 cycles:u # 2.542 GHz (74.61%) + 2,564,965 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.90%) + 4,834,497 stalled-cycles-backend:u # 0.52% backend cycles idle (72.46%) + 1,462,832,615 instructions:u # 1.58 insn per cycle + # 0.00 stalled cycles per insn (75.46%) + 0.406691727 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.961222e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.420833e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.640275e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.537970 sec -INFO: No Floating Point Exceptions have been reported - 2,313,496,127 cycles # 2.973 GHz - 3,286,265,008 instructions # 1.42 insn per cycle - 0.835500868 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.160706e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.014223e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.031032e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.605124e+02 +- 5.694382e+02 ) GeV^-2 +TOTAL : 0.503853 sec +INFO: No Floating Point Exceptions have been reported + 1,271,560,375 cycles:u # 2.529 GHz (74.93%) + 2,393,764 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.82%) + 9,992,356 stalled-cycles-backend:u # 0.79% backend cycles idle (75.80%) + 1,777,974,435 instructions:u # 1.40 insn per cycle + # 0.01 stalled cycles per insn (76.74%) + 0.563738720 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490466 -Relative difference = 5.286902838873106e-07 +Avg ME (F77/GPU) = 0.14247482467490469 +Relative difference = 5.286902836925003e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.095939e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.118909e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.118909e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.513926 sec -INFO: No Floating Point Exceptions have been reported - 4,617,878,876 cycles # 3.044 GHz - 13,179,768,298 instructions # 2.85 insn per cycle - 1.518148487 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 692) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.438309e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.466502e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.466502e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.162705 sec +INFO: No Floating Point Exceptions have been reported + 4,063,566,371 cycles:u # 3.486 GHz (74.61%) + 2,470,567 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.80%) + 751,456,331 stalled-cycles-backend:u # 18.49% backend cycles idle (75.15%) + 13,131,258,870 instructions:u # 3.23 insn per cycle + # 0.06 stalled cycles per insn (75.30%) + 1.170023945 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 720) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.958372e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033582e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033582e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.854860 sec -INFO: No Floating Point Exceptions have been reported - 2,637,650,061 cycles # 3.073 GHz - 7,552,993,704 instructions # 2.86 insn per cycle - 0.859000708 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.469436e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.553907e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.553907e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.687246 sec +INFO: No Floating Point Exceptions have been reported + 2,401,871,924 cycles:u # 3.480 GHz (74.52%) + 1,948,566 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.51%) + 617,453,605 stalled-cycles-backend:u # 25.71% backend cycles idle (74.43%) + 7,491,115,990 instructions:u # 3.12 insn per cycle + # 0.08 stalled cycles per insn (75.01%) + 0.694299928 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3003) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.291817e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.503784e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.503784e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.516316 sec -INFO: No Floating Point Exceptions have been reported - 1,490,683,274 cycles # 2.867 GHz - 3,158,884,365 instructions # 2.12 insn per cycle - 0.520526770 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2976) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.735717e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.057899e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.057899e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.369508 sec +INFO: No Floating Point Exceptions have been reported + 1,286,022,326 cycles:u # 3.453 GHz (74.27%) + 1,818,073 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.24%) + 305,110,657 stalled-cycles-backend:u # 23.73% backend cycles idle (74.24%) + 3,083,688,111 instructions:u # 2.40 insn per cycle + # 0.10 stalled cycles per insn (74.60%) + 0.376414098 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2873) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.689767e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.957818e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.957818e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.462577 sec -INFO: No Floating Point Exceptions have been reported - 1,342,018,810 cycles # 2.879 GHz - 3,010,796,760 instructions # 2.24 insn per cycle - 0.466768744 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2726) (512y: 104) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.497346e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.619356e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.619356e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.676874 sec -INFO: No Floating Point Exceptions have been reported - 1,324,736,218 cycles # 1.948 GHz - 1,960,830,009 instructions # 1.48 insn per cycle - 0.681118880 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1356) (512y: 106) (512z: 2218) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482467492589 -Relative difference = 5.286901348574438e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index ea31adf683..f61a80ed95 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-10-04_10:29:18 -DATE: 2024-10-02_22:32:15 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.702651e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.950700e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.099951e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.451239 sec -INFO: No Floating Point Exceptions have been reported - 1,977,484,525 cycles # 2.954 GHz - 2,783,351,249 instructions # 1.41 insn per cycle - 0.726735040 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 8.377727e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.319503e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.328467e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.205132e+03 +- 5.720277e+03 ) GeV^-2 +TOTAL : 0.320572 sec +INFO: No Floating Point Exceptions have been reported + 803,651,652 cycles:u # 2.440 GHz (75.78%) + 2,488,663 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.23%) + 5,317,157 stalled-cycles-backend:u # 0.66% backend cycles idle (75.05%) + 1,355,208,615 instructions:u # 1.69 insn per cycle + # 0.00 stalled cycles per insn (74.27%) + 0.372732518 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.338269e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.447507e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.811164e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 -TOTAL : 0.491472 sec -INFO: No Floating Point Exceptions have been reported - 2,126,978,214 cycles # 2.918 GHz - 2,967,166,452 instructions # 1.40 insn per cycle - 0.787773473 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.816019e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.474846e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.485746e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.171486e+04 +- 7.161170e+04 ) GeV^-2 +TOTAL : 0.422699 sec +INFO: No Floating Point Exceptions have been reported + 1,029,982,976 cycles:u # 2.442 GHz (74.04%) + 2,458,628 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.67%) + 5,130,049 stalled-cycles-backend:u # 0.50% backend cycles idle (75.81%) + 1,524,387,376 instructions:u # 1.48 insn per cycle + # 0.00 stalled cycles per insn (75.35%) + 0.481909964 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247487904286338 -Relative difference = 0.0003670698531228044 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.424322e-01 +Avg ME (F77/GPU) = 0.14247950478971561 +Relative difference = 0.0003321214564936614 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.154245e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.180927e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.180927e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.437058 sec -INFO: No Floating Point Exceptions have been reported - 4,402,948,339 cycles # 3.057 GHz - 12,951,871,317 instructions # 2.94 insn per cycle - 1.441082878 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.650625e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.689429e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.689429e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 +TOTAL : 1.014202 sec +INFO: No Floating Point Exceptions have been reported + 3,536,784,161 cycles:u # 3.478 GHz (74.89%) + 1,844,458 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.83%) + 400,984,416 stalled-cycles-backend:u # 11.34% backend cycles idle (74.83%) + 12,888,814,241 instructions:u # 3.64 insn per cycle + # 0.03 stalled cycles per insn (74.83%) + 1.021297393 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861273719524 -Relative difference = 8.940352641194861e-08 +Avg ME (F77/C++) = 0.14246858320096933 +Relative difference = 1.1791391693704193e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.851169e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.029409e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.029409e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.592102 sec -INFO: No Floating Point Exceptions have been reported - 1,729,947,177 cycles # 2.905 GHz - 4,542,920,425 instructions # 2.63 insn per cycle - 0.596239608 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.250998e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.520136e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.520136e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 +TOTAL : 0.406891 sec +INFO: No Floating Point Exceptions have been reported + 1,423,239,046 cycles:u # 3.474 GHz (74.79%) + 1,718,996 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.63%) + 484,659,717 stalled-cycles-backend:u # 34.05% backend cycles idle (74.63%) + 4,303,460,822 instructions:u # 3.02 insn per cycle + # 0.11 stalled cycles per insn (74.63%) + 0.413887732 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862329122401 -Relative difference = 1.6348320966878032e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424687e-01 +Avg ME (F77/C++) = 0.14246865423667998 +Relative difference = 3.2121666037785094e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.840593e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.576208e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.576208e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.297766 sec -INFO: No Floating Point Exceptions have been reported - 857,398,073 cycles # 2.846 GHz - 1,917,934,137 instructions # 2.24 insn per cycle - 0.301767368 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.931441e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.913616e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.913616e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 +TOTAL : 0.228275 sec +INFO: No Floating Point Exceptions have been reported + 794,261,276 cycles:u # 3.436 GHz (73.52%) + 1,831,772 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.20%) + 225,154,218 stalled-cycles-backend:u # 28.35% backend cycles idle (75.79%) + 1,861,340,575 instructions:u # 2.34 insn per cycle + # 0.12 stalled cycles per insn (75.79%) + 0.235258451 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 +Avg ME (F77/C++) = 0.14247490118064832 +Relative difference = 8.286711056488833e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.022252e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.815506e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.815506e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.289819 sec -INFO: No Floating Point Exceptions have been reported - 805,893,210 cycles # 2.747 GHz - 1,834,128,170 instructions # 2.28 insn per cycle - 0.293996379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3400) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.730274e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.196749e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.196749e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.366135 sec -INFO: No Floating Point Exceptions have been reported - 730,443,209 cycles # 1.976 GHz - 1,308,748,067 instructions # 1.79 insn per cycle - 0.370229298 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 24) (512z: 2435) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491576758442 -Relative difference = 1.1066920862943416e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 171a938e2f..8a463e21a7 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,97 +1,77 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-10-04_11:15:31 -DATE: 2024-10-02_23:02:46 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.066919e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.361842e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.361842e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.017654e+01 +- 1.429183e+01 ) GeV^-2 -TOTAL : 0.460364 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,990,366,635 cycles # 2.956 GHz - 2,905,841,235 instructions # 1.46 insn per cycle - 0.730162203 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge -WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost -WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 4.020725e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.186535e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.186535e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.955602e+02 +- 1.188241e+02 ) GeV^-2 +TOTAL : 0.478907 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,420,125,800 cycles:u # 2.887 GHz (75.11%) + 11,211,403 stalled-cycles-frontend:u # 0.79% frontend cycles idle (75.10%) + 262,078,503 stalled-cycles-backend:u # 18.45% backend cycles idle (74.86%) + 1,896,380,021 instructions:u # 1.34 insn per cycle + # 0.14 stalled cycles per insn (73.88%) + 0.531984168 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.138480e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.921745e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.921745e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.609941e+02 +- 2.115589e+02 ) GeV^-2 -TOTAL : 0.626871 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,570,592,828 cycles # 2.938 GHz - 3,830,625,555 instructions # 1.49 insn per cycle - 0.931187767 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.747782e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.141361e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.141361e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.184227e+03 +- 7.941570e+02 ) GeV^-2 +TOTAL : 1.009049 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,053,459,163 cycles:u # 2.959 GHz (75.31%) + 29,478,807 stalled-cycles-frontend:u # 0.97% frontend cycles idle (75.66%) + 840,754,925 stalled-cycles-backend:u # 27.53% backend cycles idle (74.67%) + 3,346,412,800 instructions:u # 1.10 insn per cycle + # 0.25 stalled cycles per insn (74.67%) + 1.071619763 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -99,35 +79,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247487904286338 -Relative difference = 0.0003670698531228044 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.424322e-01 +Avg ME (F77/GPU) = 0.14247950478971561 +Relative difference = 0.0003321214564936614 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.145066e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.171268e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.171268e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.451272 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,420,042,371 cycles # 3.039 GHz - 12,957,560,789 instructions # 2.93 insn per cycle - 1.455401506 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.652830e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.691788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.691788e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 +TOTAL : 1.014850 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,544,674,257 cycles:u # 3.482 GHz (74.87%) + 1,715,233 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.86%) + 399,646,581 stalled-cycles-backend:u # 11.27% backend cycles idle (74.86%) + 12,880,885,169 instructions:u # 3.63 insn per cycle + # 0.03 stalled cycles per insn (74.86%) + 1.021880296 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -135,33 +116,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861273719524 -Relative difference = 8.940352641194861e-08 +Avg ME (F77/C++) = 0.14246858320096933 +Relative difference = 1.1791391693704193e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.984297e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.170633e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.170633e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.570146 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,748,150,599 cycles # 3.047 GHz - 4,590,399,718 instructions # 2.63 insn per cycle - 0.574229373 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.122119e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.374377e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.374377e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 +TOTAL : 0.421621 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,455,852,865 cycles:u # 3.429 GHz (75.45%) + 1,801,388 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.52%) + 518,485,359 stalled-cycles-backend:u # 35.61% backend cycles idle (75.52%) + 4,311,204,773 instructions:u # 2.96 insn per cycle + # 0.12 stalled cycles per insn (75.52%) + 0.429595072 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3392) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -169,33 +153,36 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862329122401 -Relative difference = 1.6348320966878032e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424687e-01 +Avg ME (F77/C++) = 0.14246865423667998 +Relative difference = 3.2121666037785094e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.872273e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.592788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.592788e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.300259 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 875,448,713 cycles # 2.882 GHz - 1,954,867,221 instructions # 2.23 insn per cycle - 0.304452268 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.898517e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.853816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.853816e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 +TOTAL : 0.231864 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 810,604,946 cycles:u # 3.450 GHz (72.09%) + 1,903,581 stalled-cycles-frontend:u # 0.23% frontend cycles idle (73.94%) + 222,245,004 stalled-cycles-backend:u # 27.42% backend cycles idle (75.61%) + 1,888,821,187 instructions:u # 2.33 insn per cycle + # 0.12 stalled cycles per insn (76.18%) + 0.238946046 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3488) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -203,80 +190,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 +Avg ME (F77/C++) = 0.14247490118064832 +Relative difference = 8.286711056488833e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.281096e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.128992e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.128992e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.282309 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 821,270,186 cycles # 2.872 GHz - 1,871,027,279 instructions # 2.28 insn per cycle - 0.286525778 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3400) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.718318e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.194314e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.194314e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.370922 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 748,872,143 cycles # 2.000 GHz - 1,350,116,546 instructions # 1.80 insn per cycle - 0.375129376 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 24) (512z: 2435) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491576758442 -Relative difference = 1.1066920862943416e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 2256daf6c3..5af0f6ea0a 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-10-04_10:29:24 -DATE: 2024-10-02_22:32:27 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.702298e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.990170e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.136648e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 -TOTAL : 0.449421 sec -INFO: No Floating Point Exceptions have been reported - 1,950,583,088 cycles # 2.925 GHz - 2,701,544,767 instructions # 1.38 insn per cycle - 0.724364608 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 8.082066e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.215210e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.223122e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.205132e+03 +- 5.720277e+03 ) GeV^-2 +TOTAL : 0.317799 sec +INFO: No Floating Point Exceptions have been reported + 844,589,664 cycles:u # 2.584 GHz (73.78%) + 2,509,111 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.31%) + 12,245,889 stalled-cycles-backend:u # 1.45% backend cycles idle (74.70%) + 1,354,529,485 instructions:u # 1.60 insn per cycle + # 0.01 stalled cycles per insn (74.50%) + 0.371487604 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.344116e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.482358e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.864758e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 -TOTAL : 0.487785 sec -INFO: No Floating Point Exceptions have been reported - 2,122,439,624 cycles # 2.960 GHz - 3,010,905,785 instructions # 1.42 insn per cycle - 0.774447089 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.705812e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.228243e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.237115e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.171486e+04 +- 7.161170e+04 ) GeV^-2 +TOTAL : 0.403616 sec +INFO: No Floating Point Exceptions have been reported + 1,063,510,607 cycles:u # 2.535 GHz (75.56%) + 2,307,789 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.82%) + 8,084,554 stalled-cycles-backend:u # 0.76% backend cycles idle (74.87%) + 1,664,889,093 instructions:u # 1.57 insn per cycle + # 0.00 stalled cycles per insn (72.80%) + 0.462869624 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 1.424226e-01 -Avg ME (F77/GPU) = 0.14247487904286338 -Relative difference = 0.0003670698531228044 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 1.424322e-01 +Avg ME (F77/GPU) = 0.14247950479185079 +Relative difference = 0.00033212147148451967 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.149657e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175819e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.175819e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 1.442333 sec -INFO: No Floating Point Exceptions have been reported - 4,403,161,402 cycles # 3.046 GHz - 12,927,638,091 instructions # 2.94 insn per cycle - 1.446362002 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.637062e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.675221e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.675221e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 +TOTAL : 1.022211 sec +INFO: No Floating Point Exceptions have been reported + 3,589,849,070 cycles:u # 3.502 GHz (74.85%) + 1,729,282 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.03%) + 525,569,379 stalled-cycles-backend:u # 14.64% backend cycles idle (75.03%) + 12,871,759,204 instructions:u # 3.59 insn per cycle + # 0.04 stalled cycles per insn (75.03%) + 1.029456479 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 718) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246861273719524 -Relative difference = 8.940352641194861e-08 +Avg ME (F77/C++) = 0.14246858320096933 +Relative difference = 1.1791391693704193e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.989413e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.176290e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.176290e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 -TOTAL : 0.564589 sec -INFO: No Floating Point Exceptions have been reported - 1,725,063,093 cycles # 3.036 GHz - 4,536,592,580 instructions # 2.63 insn per cycle - 0.568805063 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3611) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.193624e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.454281e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.454281e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 +TOTAL : 0.411966 sec +INFO: No Floating Point Exceptions have been reported + 1,442,483,013 cycles:u # 3.478 GHz (75.10%) + 1,744,540 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.94%) + 489,961,029 stalled-cycles-backend:u # 33.97% backend cycles idle (74.94%) + 4,296,198,664 instructions:u # 2.98 insn per cycle + # 0.11 stalled cycles per insn (74.94%) + 0.419091231 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3379) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246862329122401 -Relative difference = 1.6348320966878032e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424687e-01 +Avg ME (F77/C++) = 0.14246865423667998 +Relative difference = 3.2121666037785094e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.871312e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.604631e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.604631e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.296052 sec -INFO: No Floating Point Exceptions have been reported - 857,546,580 cycles # 2.863 GHz - 1,914,366,165 instructions # 2.23 insn per cycle - 0.300067432 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3549) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.968545e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.937029e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.937029e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 +TOTAL : 0.226723 sec +INFO: No Floating Point Exceptions have been reported + 778,463,869 cycles:u # 3.391 GHz (75.62%) + 1,805,704 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.62%) + 241,725,364 stalled-cycles-backend:u # 31.05% backend cycles idle (75.62%) + 1,852,884,590 instructions:u # 2.38 insn per cycle + # 0.13 stalled cycles per insn (75.62%) + 0.233668425 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3463) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 +Avg ME (F77/C++) = 0.14247490118064832 +Relative difference = 8.286711056488833e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.287189e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.128303e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.128303e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.277637 sec -INFO: No Floating Point Exceptions have been reported - 802,533,820 cycles # 2.856 GHz - 1,829,848,597 instructions # 2.28 insn per cycle - 0.281575570 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 22) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491543012991 -Relative difference = 1.0830068962165901e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.755061e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.233949e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.233949e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 -TOTAL : 0.364070 sec -INFO: No Floating Point Exceptions have been reported - 730,229,495 cycles # 1.987 GHz - 1,306,200,417 instructions # 1.79 insn per cycle - 0.368140152 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1928) (512y: 24) (512z: 2435) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247491576758442 -Relative difference = 1.1066920862943416e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index d81706c8fb..4e7a959012 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-10-04_10:29:30 -DATE: 2024-10-02_22:32:39 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.762491e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.836111e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.951794e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.455722 sec -INFO: No Floating Point Exceptions have been reported - 1,975,760,031 cycles # 2.935 GHz - 2,772,242,722 instructions # 1.40 insn per cycle - 0.730835336 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.550777e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.684021e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.686291e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.219643e+03 +- 1.210703e+03 ) GeV^-2 +TOTAL : 0.467527 sec +INFO: No Floating Point Exceptions have been reported + 1,253,452,808 cycles:u # 2.727 GHz (76.13%) + 2,883,239 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.27%) + 8,880,480 stalled-cycles-backend:u # 0.71% backend cycles idle (73.55%) + 1,677,319,380 instructions:u # 1.34 insn per cycle + # 0.01 stalled cycles per insn (73.26%) + 0.516224704 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.992470e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.540289e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.772038e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.539165 sec -INFO: No Floating Point Exceptions have been reported - 2,324,912,396 cycles # 2.969 GHz - 3,295,857,552 instructions # 1.42 insn per cycle - 0.840288561 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.999859e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.721066e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.736492e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.605124e+02 +- 5.694382e+02 ) GeV^-2 +TOTAL : 0.486288 sec +INFO: No Floating Point Exceptions have been reported + 1,266,298,701 cycles:u # 2.496 GHz (75.97%) + 2,417,057 stalled-cycles-frontend:u # 0.19% frontend cycles idle (76.65%) + 7,438,043 stalled-cycles-backend:u # 0.59% backend cycles idle (75.86%) + 1,822,767,707 instructions:u # 1.44 insn per cycle + # 0.00 stalled cycles per insn (73.80%) + 0.547651674 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482577104625 Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.097209e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.120361e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.120361e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.512296 sec -INFO: No Floating Point Exceptions have been reported - 4,639,671,723 cycles # 3.061 GHz - 13,178,453,080 instructions # 2.84 insn per cycle - 1.516607479 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 681) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.463112e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.492510e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.492510e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.143575 sec +INFO: No Floating Point Exceptions have been reported + 3,997,523,281 cycles:u # 3.486 GHz (74.99%) + 1,905,658 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.89%) + 516,672,849 stalled-cycles-backend:u # 12.92% backend cycles idle (74.89%) + 13,130,248,081 instructions:u # 3.28 insn per cycle + # 0.04 stalled cycles per insn (74.89%) + 1.151446300 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 706) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.927117e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.999096e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.999096e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.868650 sec -INFO: No Floating Point Exceptions have been reported - 2,644,248,242 cycles # 3.032 GHz - 7,473,014,363 instructions # 2.83 insn per cycle - 0.872842396 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3152) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.513522e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.600407e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.600407e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.675411 sec +INFO: No Floating Point Exceptions have been reported + 2,363,364,099 cycles:u # 3.484 GHz (74.84%) + 2,086,161 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.24%) + 572,220,288 stalled-cycles-backend:u # 24.21% backend cycles idle (75.24%) + 7,436,302,025 instructions:u # 3.15 insn per cycle + # 0.08 stalled cycles per insn (75.24%) + 0.682583845 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3104) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.309998e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.525678e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.525678e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.513914 sec -INFO: No Floating Point Exceptions have been reported - 1,471,858,704 cycles # 2.848 GHz - 3,126,825,800 instructions # 2.12 insn per cycle - 0.518256433 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3133) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.842606e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.180850e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.180850e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.361999 sec +INFO: No Floating Point Exceptions have been reported + 1,257,591,048 cycles:u # 3.446 GHz (73.72%) + 1,838,682 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.43%) + 367,694,710 stalled-cycles-backend:u # 29.24% backend cycles idle (75.52%) + 3,030,416,443 instructions:u # 2.41 insn per cycle + # 0.12 stalled cycles per insn (75.90%) + 0.369270773 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3024) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.744395e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.024619e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.024619e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.456745 sec -INFO: No Floating Point Exceptions have been reported - 1,318,209,963 cycles # 2.863 GHz - 2,981,428,844 instructions # 2.26 insn per cycle - 0.461015665 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 110) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.415670e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.528359e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.528359e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.699082 sec -INFO: No Floating Point Exceptions have been reported - 1,360,436,298 cycles # 1.937 GHz - 1,989,825,380 instructions # 1.46 insn per cycle - 0.703247363 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1679) (512y: 108) (512z: 2251) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index 4385bdd6af..bd70ad90bb 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +DATE: 2024-10-04_10:29:36 -DATE: 2024-10-02_22:32:53 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.778483e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.885440e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.000351e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.455432 sec -INFO: No Floating Point Exceptions have been reported - 1,987,161,261 cycles # 2.956 GHz - 2,799,045,356 instructions # 1.41 insn per cycle - 0.729366827 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 3.139644e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.654366e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.664699e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.219643e+03 +- 1.210703e+03 ) GeV^-2 +TOTAL : 0.350320 sec +INFO: No Floating Point Exceptions have been reported + 978,388,626 cycles:u # 2.688 GHz (74.41%) + 2,647,867 stalled-cycles-frontend:u # 0.27% frontend cycles idle (73.96%) + 6,328,070 stalled-cycles-backend:u # 0.65% backend cycles idle (74.38%) + 1,504,365,616 instructions:u # 1.54 insn per cycle + # 0.00 stalled cycles per insn (75.62%) + 0.403747700 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.953178e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.419365e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.640921e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 -TOTAL : 0.537410 sec -INFO: No Floating Point Exceptions have been reported - 2,307,597,745 cycles # 2.969 GHz - 3,283,930,647 instructions # 1.42 insn per cycle - 0.834536652 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.175064e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.942863e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.959336e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.605124e+02 +- 5.694382e+02 ) GeV^-2 +TOTAL : 0.481624 sec +INFO: No Floating Point Exceptions have been reported + 1,269,901,645 cycles:u # 2.528 GHz (75.34%) + 2,461,659 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.34%) + 5,825,646 stalled-cycles-backend:u # 0.46% backend cycles idle (76.47%) + 1,815,054,012 instructions:u # 1.43 insn per cycle + # 0.00 stalled cycles per insn (75.78%) + 0.543801715 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482577104625 Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.090474e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.113459e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.113459e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 1.521513 sec -INFO: No Floating Point Exceptions have been reported - 4,642,408,622 cycles # 3.044 GHz - 13,166,526,592 instructions # 2.84 insn per cycle - 1.525661892 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.463537e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.492756e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.492756e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 1.142870 sec +INFO: No Floating Point Exceptions have been reported + 3,995,713,367 cycles:u # 3.487 GHz (74.87%) + 1,908,462 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.87%) + 706,846,408 stalled-cycles-backend:u # 17.69% backend cycles idle (74.87%) + 13,129,808,915 instructions:u # 3.29 insn per cycle + # 0.05 stalled cycles per insn (74.87%) + 1.150382779 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 697) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.922918e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.995508e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.995508e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.870529 sec -INFO: No Floating Point Exceptions have been reported - 2,636,402,305 cycles # 3.016 GHz - 7,475,113,402 instructions # 2.84 insn per cycle - 0.874675780 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.529128e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.617778e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.617778e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.671420 sec +INFO: No Floating Point Exceptions have been reported + 2,340,956,935 cycles:u # 3.471 GHz (75.10%) + 1,957,506 stalled-cycles-frontend:u # 0.08% frontend cycles idle (75.10%) + 596,609,152 stalled-cycles-backend:u # 25.49% backend cycles idle (75.10%) + 7,452,557,298 instructions:u # 3.18 insn per cycle + # 0.08 stalled cycles per insn (75.10%) + 0.678666189 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.327635e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.552954e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.552954e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.510959 sec -INFO: No Floating Point Exceptions have been reported - 1,472,054,188 cycles # 2.861 GHz - 3,127,403,529 instructions # 2.12 insn per cycle - 0.515241692 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3111) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.779457e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.111075e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.111075e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 +TOTAL : 0.365983 sec +INFO: No Floating Point Exceptions have been reported + 1,278,426,865 cycles:u # 3.465 GHz (74.21%) + 1,921,745 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.00%) + 293,464,402 stalled-cycles-backend:u # 22.96% backend cycles idle (74.10%) + 3,049,353,775 instructions:u # 2.39 insn per cycle + # 0.10 stalled cycles per insn (74.94%) + 0.373312575 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3002) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.751588e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.026290e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.026290e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.455204 sec -INFO: No Floating Point Exceptions have been reported - 1,320,153,544 cycles # 2.877 GHz - 2,981,574,848 instructions # 2.26 insn per cycle - 0.459378563 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2871) (512y: 110) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.424669e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.537772e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.537772e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 -TOTAL : 0.696909 sec -INFO: No Floating Point Exceptions have been reported - 1,363,054,761 cycles # 1.945 GHz - 1,990,224,700 instructions # 1.46 insn per cycle - 0.701261631 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1655) (512y: 108) (512z: 2251) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247482643254802 -Relative difference = 5.163537715318965e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 8c3e307fe5..d954d137a8 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-10-04_11:52:12 -DATE: 2024-10-02_23:24:59 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.189379e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.854347e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.468984e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.532180 sec -INFO: No Floating Point Exceptions have been reported - 2,219,216,234 cycles # 2.899 GHz - 3,174,009,870 instructions # 1.43 insn per cycle - 0.825106849 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.548876e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.878752e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.890800e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.428405 sec +INFO: No Floating Point Exceptions have been reported + 1,043,110,697 cycles:u # 2.414 GHz (75.87%) + 2,510,213 stalled-cycles-frontend:u # 0.24% frontend cycles idle (76.40%) + 10,356,025 stalled-cycles-backend:u # 0.99% backend cycles idle (75.48%) + 1,549,103,394 instructions:u # 1.49 insn per cycle + # 0.01 stalled cycles per insn (74.97%) + 0.488419891 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110280 -Relative difference = 2.1036162329561614e-07 +Avg ME (F77/GPU) = 4.3134710926110271 +Relative difference = 2.1036162350152416e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.678393e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.716890e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.716890e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.356728 sec -INFO: No Floating Point Exceptions have been reported - 19,323,098,467 cycles # 3.038 GHz - 51,924,439,414 instructions # 2.69 insn per cycle - 6.362461259 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.291565e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.341074e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.341074e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.761272 sec +INFO: No Floating Point Exceptions have been reported + 16,413,565,928 cycles:u # 3.439 GHz (74.91%) + 9,168,673 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) + 2,000,498,527 stalled-cycles-backend:u # 12.19% backend cycles idle (75.02%) + 51,616,234,124 instructions:u # 3.14 insn per cycle + # 0.04 stalled cycles per insn (75.03%) + 4.777715028 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 746) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.021374e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.160318e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.160318e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.578860 sec -INFO: No Floating Point Exceptions have been reported - 10,923,994,538 cycles # 3.048 GHz - 30,795,051,014 instructions # 2.82 insn per cycle - 3.584731673 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2915) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.901907e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.055059e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.055059e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.870038 sec +INFO: No Floating Point Exceptions have been reported + 9,748,609,775 cycles:u # 3.383 GHz (75.02%) + 9,234,368 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.02%) + 3,080,077,738 stalled-cycles-backend:u # 31.60% backend cycles idle (75.02%) + 30,688,640,376 instructions:u # 3.15 insn per cycle + # 0.10 stalled cycles per insn (75.04%) + 2.886309143 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2833) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.869937e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.224318e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.224318e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.261390 sec -INFO: No Floating Point Exceptions have been reported - 6,498,269,514 cycles # 2.867 GHz - 13,665,834,043 instructions # 2.10 insn per cycle - 2.267304210 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2941) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.969814e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.446981e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.446981e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.686665 sec +INFO: No Floating Point Exceptions have been reported + 5,601,471,902 cycles:u # 3.298 GHz (75.08%) + 8,267,317 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.04%) + 1,288,835,229 stalled-cycles-backend:u # 23.01% backend cycles idle (75.04%) + 13,373,121,064 instructions:u # 2.39 insn per cycle + # 0.10 stalled cycles per insn (75.04%) + 1.702912365 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2817) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.324016e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.747508e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.747508e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.078273 sec -INFO: No Floating Point Exceptions have been reported - 5,947,948,769 cycles # 2.855 GHz - 13,008,169,729 instructions # 2.19 insn per cycle - 2.084199816 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2667) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.663058e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.855570e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.855570e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.970789 sec -INFO: No Floating Point Exceptions have been reported - 5,847,713,634 cycles # 1.965 GHz - 8,587,473,758 instructions # 1.47 insn per cycle - 2.976683697 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1506) (512y: 128) (512z: 1946) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 70b1342c04..8904cc9c5f 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-10-04_11:52:25 -DATE: 2024-10-02_23:25:25 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.145206e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.750029e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.339208e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.539827 sec -INFO: No Floating Point Exceptions have been reported - 2,187,035,010 cycles # 2.816 GHz - 3,118,040,099 instructions # 1.43 insn per cycle - 0.835459641 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.647769e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.014068e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.027873e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.409756 sec +INFO: No Floating Point Exceptions have been reported + 1,010,035,303 cycles:u # 2.364 GHz (75.46%) + 2,553,485 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.92%) + 7,861,623 stalled-cycles-backend:u # 0.78% backend cycles idle (75.02%) + 1,595,464,135 instructions:u # 1.58 insn per cycle + # 0.00 stalled cycles per insn (74.81%) + 0.471662306 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110280 -Relative difference = 2.1036162329561614e-07 +Avg ME (F77/GPU) = 4.3134710926110271 +Relative difference = 2.1036162350152416e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.757288e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.800092e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.800092e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.078460 sec -INFO: No Floating Point Exceptions have been reported - 18,383,455,963 cycles # 3.022 GHz - 50,054,891,477 instructions # 2.72 insn per cycle - 6.084475174 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 626) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.373515e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.427207e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.427207e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.601523 sec +INFO: No Floating Point Exceptions have been reported + 15,839,223,004 cycles:u # 3.433 GHz (74.93%) + 9,992,371 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) + 238,179,835 stalled-cycles-backend:u # 1.50% backend cycles idle (75.03%) + 49,868,612,389 instructions:u # 3.15 insn per cycle + # 0.00 stalled cycles per insn (75.03%) + 4.618020219 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.164998e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.317783e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.317783e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.422760 sec -INFO: No Floating Point Exceptions have been reported - 10,425,198,156 cycles # 3.042 GHz - 29,176,493,270 instructions # 2.80 insn per cycle - 3.428392442 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2733) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.062465e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.229594e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.229594e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.763609 sec +INFO: No Floating Point Exceptions have been reported + 9,381,584,870 cycles:u # 3.380 GHz (74.92%) + 8,842,331 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.95%) + 2,405,936,514 stalled-cycles-backend:u # 25.65% backend cycles idle (74.94%) + 29,354,889,379 instructions:u # 3.13 insn per cycle + # 0.08 stalled cycles per insn (74.92%) + 2.779716498 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2625) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.494730e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.797227e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.797227e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.439543 sec -INFO: No Floating Point Exceptions have been reported - 7,004,291,405 cycles # 2.865 GHz - 15,150,544,724 instructions # 2.16 insn per cycle - 2.445416331 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3020) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.036601e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.390743e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.390743e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.919623 sec +INFO: No Floating Point Exceptions have been reported + 6,461,957,078 cycles:u # 3.345 GHz (74.80%) + 9,210,517 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.76%) + 2,027,388,109 stalled-cycles-backend:u # 31.37% backend cycles idle (74.96%) + 15,191,337,244 instructions:u # 2.35 insn per cycle + # 0.13 stalled cycles per insn (75.15%) + 1.936614466 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3011) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.607457e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.924149e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.924149e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.384173 sec -INFO: No Floating Point Exceptions have been reported - 6,707,006,951 cycles # 2.807 GHz - 14,619,839,876 instructions # 2.18 insn per cycle - 2.390050397 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2621) (512y: 302) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.451987e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.626148e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.626148e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.147901 sec -INFO: No Floating Point Exceptions have been reported - 6,045,923,955 cycles # 1.918 GHz - 10,338,625,122 instructions # 1.71 insn per cycle - 3.153821789 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 214) (512z: 2129) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134710926107935 -Relative difference = 2.103616776553298e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index 001e031ae4..e7bcc40711 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-10-04_11:52:38 -DATE: 2024-10-02_23:25:51 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.625139e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.523370e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.621120e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.487063 sec +EvtsPerSec[Rmb+ME] (23) = ( 2.943181e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.870847e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.897072e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.834176e+00 +- 1.462500e-01 ) GeV^0 +TOTAL : 0.352477 sec INFO: No Floating Point Exceptions have been reported - 2,103,765,597 cycles # 2.940 GHz - 3,010,989,522 instructions # 1.43 insn per cycle - 0.772591402 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 131 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 876,247,972 cycles:u # 2.390 GHz (76.01%) + 2,497,336 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.03%) + 7,944,281 stalled-cycles-backend:u # 0.91% backend cycles idle (75.58%) + 1,498,533,832 instructions:u # 1.71 insn per cycle + # 0.01 stalled cycles per insn (76.36%) + 0.408456697 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313490e+00 -Avg ME (F77/GPU) = 4.3136695491848513 -Relative difference = 4.162503792787837e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 4.313524e+00 +Avg ME (F77/GPU) = 4.3135525361867622 +Relative difference = 6.615515935930387e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.742643e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.785190e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.785190e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 6.103332 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,609,905,827 cycles # 3.047 GHz - 51,215,063,345 instructions # 2.75 insn per cycle - 6.108967968 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 625) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.542954e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.605348e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.605348e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 4.270864 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 14,749,563,788 cycles:u # 3.447 GHz (74.95%) + 17,056,430 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.87%) + 2,639,824,270 stalled-cycles-backend:u # 17.90% backend cycles idle (74.90%) + 51,559,248,161 instructions:u # 3.50 insn per cycle + # 0.05 stalled cycles per insn (75.08%) + 4.282924101 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 723) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,33 +86,36 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135738277342170 -Relative difference = 3.9935743068669333e-08 +Avg ME (F77/C++) = 4.3135737704578787 +Relative difference = 5.321390598852464e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.182136e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.464848e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.464848e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.593631 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,948,906,401 cycles # 3.059 GHz - 19,317,685,979 instructions # 2.43 insn per cycle - 2.599267681 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3542) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.744129e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.077428e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.077428e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 1.971346 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,687,741,511 cycles:u # 3.379 GHz (74.94%) + 11,735,458 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.94%) + 2,605,727,205 stalled-cycles-backend:u # 38.96% backend cycles idle (74.94%) + 18,683,455,679 instructions:u # 2.79 insn per cycle + # 0.14 stalled cycles per insn (74.97%) + 1.983304528 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3319) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,33 +123,36 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313572e+00 -Avg ME (F77/C++) = 4.3135722697479650 -Relative difference = 6.253470796314402e-08 +Avg ME (C++/C++) = 4.313573e+00 +Avg ME (F77/C++) = 4.3135733226081356 +Relative difference = 7.478907526568244e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.171182e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.241251e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.241251e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.368181 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,965,754,508 cycles # 2.888 GHz - 8,832,724,394 instructions # 2.23 insn per cycle - 1.373877553 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3715) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.129095e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.256231e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.256231e+06 ) sec^-1 +MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 1.074921 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,554,292,535 cycles:u # 3.282 GHz (74.93%) + 6,570,022 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.89%) + 1,124,556,780 stalled-cycles-backend:u # 31.64% backend cycles idle (74.89%) + 8,625,582,750 instructions:u # 2.43 insn per cycle + # 0.13 stalled cycles per insn (74.90%) + 1.087177668 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3600) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,78 +160,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 +Avg ME (F77/C++) = 4.3135650658514351 +Relative difference = 1.526612799754012e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.610704e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.814571e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.814571e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.302060 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,747,639,043 cycles # 2.867 GHz - 8,431,545,053 instructions # 2.25 insn per cycle - 1.307700074 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3541) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.347091e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.938350e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.938350e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.737189 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,508,553,237 cycles # 2.014 GHz - 6,243,454,205 instructions # 1.78 insn per cycle - 1.742932448 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2325) (512y: 22) (512z: 2290) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313564e+00 -Avg ME (F77/C++) = 4.3135643536224961 -Relative difference = 8.197919301304478e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index 07d75bc161..f3beef6e21 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-10-04_11:52:49 -DATE: 2024-10-02_23:26:12 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.885122e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.628871e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.741563e+08 ) sec^-1 -MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 -TOTAL : 0.487946 sec +EvtsPerSec[Rmb+ME] (23) = ( 3.293817e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.590857e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.628069e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.834176e+00 +- 1.462500e-01 ) GeV^0 +TOTAL : 0.356321 sec INFO: No Floating Point Exceptions have been reported - 2,087,121,908 cycles # 2.910 GHz - 3,019,371,370 instructions # 1.45 insn per cycle - 0.773659070 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 853,436,101 cycles:u # 2.315 GHz (73.39%) + 2,358,095 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.58%) + 12,115,732 stalled-cycles-backend:u # 1.42% backend cycles idle (74.46%) + 1,575,446,030 instructions:u # 1.85 insn per cycle + # 0.01 stalled cycles per insn (73.75%) + 0.414967357 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 4.313490e+00 -Avg ME (F77/GPU) = 4.3136695491848513 -Relative difference = 4.162503792787837e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 4.313524e+00 +Avg ME (F77/GPU) = 4.3135525361867622 +Relative difference = 6.615515935930387e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.770821e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.815512e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.815512e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 6.006875 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 18,030,716,622 cycles # 2.999 GHz - 49,602,013,092 instructions # 2.75 insn per cycle - 6.012632180 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 613) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.718331e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.788416e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.788416e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 4.005246 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 13,835,421,219 cycles:u # 3.448 GHz (74.90%) + 17,069,198 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.96%) + 357,948,952 stalled-cycles-backend:u # 2.59% backend cycles idle (75.06%) + 49,471,917,423 instructions:u # 3.58 insn per cycle + # 0.01 stalled cycles per insn (75.08%) + 4.017265807 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 614) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,33 +86,36 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135738277342170 -Relative difference = 3.9935743068669333e-08 +Avg ME (F77/C++) = 4.3135737704578787 +Relative difference = 5.321390598852464e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.661063e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.005931e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.005931e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 -TOTAL : 2.335528 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,124,417,342 cycles # 3.044 GHz - 18,533,238,890 instructions # 2.60 insn per cycle - 2.341180166 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3252) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.816066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.284665e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.284665e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 +TOTAL : 1.684957 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,703,146,096 cycles:u # 3.370 GHz (74.86%) + 12,386,880 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.95%) + 1,730,264,085 stalled-cycles-backend:u # 30.34% backend cycles idle (74.95%) + 18,193,557,266 instructions:u # 3.19 insn per cycle + # 0.10 stalled cycles per insn (74.96%) + 1.696865901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3078) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,33 +123,36 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313572e+00 -Avg ME (F77/C++) = 4.3135722697479650 -Relative difference = 6.253470796314402e-08 +Avg ME (C++/C++) = 4.313573e+00 +Avg ME (F77/C++) = 4.3135733226081356 +Relative difference = 7.478907526568244e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.555350e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.026882e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.026882e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.973614 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,639,444,254 cycles # 2.850 GHz - 10,848,081,116 instructions # 1.92 insn per cycle - 1.979248695 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4274) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.399416e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.080995e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.080995e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 +TOTAL : 1.394508 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 4,683,415,974 cycles:u # 3.340 GHz (74.94%) + 7,993,120 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.90%) + 1,817,506,238 stalled-cycles-backend:u # 38.81% backend cycles idle (74.90%) + 10,765,447,899 instructions:u # 2.30 insn per cycle + # 0.17 stalled cycles per insn (74.90%) + 1.406990317 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4259) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,82 +160,18 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 +Avg ME (F77/C++) = 4.3135650658514351 +Relative difference = 1.526612799754012e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.687423e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.182059e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.182059e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 1.928080 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,565,851,949 cycles # 2.880 GHz - 10,551,069,876 instructions # 1.90 insn per cycle - 1.933684179 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4138) (512y: 12) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135645242873579 -Relative difference = 1.1028294269894893e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.666673e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.977886e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.977886e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 -TOTAL : 2.332019 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,637,687,168 cycles # 1.985 GHz - 8,659,128,272 instructions # 1.87 insn per cycle - 2.337748946 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 0) (512z: 2885) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313564e+00 -Avg ME (F77/C++) = 4.3135643536224961 -Relative difference = 8.197919301304478e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 17ba5d04ac..3651a68d0f 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-10-04_11:52:59 -DATE: 2024-10-02_23:26:35 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.145183e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.832777e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.435037e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.531018 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.549341e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.895244e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.907493e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.412135 sec INFO: No Floating Point Exceptions have been reported - 2,261,745,252 cycles # 2.959 GHz - 3,218,464,294 instructions # 1.42 insn per cycle - 0.823443286 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,012,754,443 cycles:u # 2.356 GHz (76.22%) + 2,315,059 stalled-cycles-frontend:u # 0.23% frontend cycles idle (76.85%) + 7,225,280 stalled-cycles-backend:u # 0.71% backend cycles idle (74.91%) + 1,673,431,305 instructions:u # 1.65 insn per cycle + # 0.00 stalled cycles per insn (73.79%) + 0.475036466 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.569215e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.602822e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.602822e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.791642 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 20,563,959,508 cycles # 3.026 GHz - 51,925,698,785 instructions # 2.53 insn per cycle - 6.797429254 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.270757e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.319460e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.319460e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.802290 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 16,546,095,737 cycles:u # 3.437 GHz (74.93%) + 31,931,509 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.91%) + 2,162,110,524 stalled-cycles-backend:u # 13.07% backend cycles idle (74.96%) + 51,706,306,670 instructions:u # 3.12 insn per cycle + # 0.04 stalled cycles per insn (75.04%) + 4.818986615 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,8 +86,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -113,24 +95,27 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.866433e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.990571e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.990571e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.767439 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 11,513,295,665 cycles # 3.052 GHz - 30,592,567,538 instructions # 2.66 insn per cycle - 3.773601304 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2972) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.890360e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.044955e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.044955e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.876779 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 9,813,550,759 cycles:u # 3.397 GHz (74.86%) + 14,935,911 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.00%) + 3,058,985,000 stalled-cycles-backend:u # 31.17% backend cycles idle (75.08%) + 30,515,940,191 instructions:u # 3.11 insn per cycle + # 0.10 stalled cycles per insn (75.08%) + 2.893065928 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2927) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,8 +123,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -147,24 +132,27 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.729775e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.061750e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.061750e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.323879 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,711,394,456 cycles # 2.882 GHz - 13,608,749,696 instructions # 2.03 insn per cycle - 2.329702373 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3118) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.151810e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.659007e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.659007e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.647064 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,474,101,790 cycles:u # 3.300 GHz (74.93%) + 12,312,874 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.93%) + 1,329,742,792 stalled-cycles-backend:u # 24.29% backend cycles idle (74.96%) + 13,319,370,462 instructions:u # 2.43 insn per cycle + # 0.10 stalled cycles per insn (74.96%) + 1.663227013 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3019) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,8 +160,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -181,73 +169,9 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.169662e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.568966e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.568966e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.135490 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,181,875,885 cycles # 2.888 GHz - 12,975,632,555 instructions # 2.10 insn per cycle - 2.141464236 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2851) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.298256e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.453472e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.453472e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.288067 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,406,126,196 cycles # 1.946 GHz - 8,701,338,330 instructions # 1.36 insn per cycle - 3.294025783 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1792) (512y: 130) (512z: 2014) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index 2ae9588cbc..100ace0fa7 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +DATE: 2024-10-04_11:53:13 -DATE: 2024-10-02_23:27:02 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.150402e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.856906e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.454476e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 0.526172 sec +EvtsPerSec[Rmb+ME] (23) = ( 1.640738e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.025699e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.039692e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 +TOTAL : 0.408456 sec INFO: No Floating Point Exceptions have been reported - 2,295,452,706 cycles # 2.993 GHz - 3,307,765,060 instructions # 1.44 insn per cycle - 0.824169356 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% + 1,044,567,514 cycles:u # 2.447 GHz (76.45%) + 2,397,919 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.70%) + 5,774,628 stalled-cycles-backend:u # 0.55% backend cycles idle (73.30%) + 1,595,969,251 instructions:u # 1.53 insn per cycle + # 0.00 stalled cycles per insn (73.73%) + 0.471349867 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.671429e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.710309e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.710309e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 6.383632 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 19,535,242,963 cycles # 3.058 GHz - 49,954,649,142 instructions # 2.56 insn per cycle - 6.389286053 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 599) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.410893e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.467452e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.467452e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 4.532366 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 15,572,968,721 cycles:u # 3.427 GHz (75.01%) + 31,406,791 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.00%) + 49,131,449 stalled-cycles-backend:u # 0.32% backend cycles idle (75.01%) + 49,902,625,148 instructions:u # 3.20 insn per cycle + # 0.00 stalled cycles per insn (75.01%) + 4.549108797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 652) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -104,8 +86,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -113,24 +95,27 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.974616e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.107062e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.107062e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.633598 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 11,048,626,108 cycles # 3.037 GHz - 29,139,783,516 instructions # 2.64 insn per cycle - 3.639341681 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2815) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.990717e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.154860e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.154860e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 2.810554 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 9,549,397,093 cycles:u # 3.383 GHz (74.95%) + 15,707,127 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.06%) + 1,948,445,300 stalled-cycles-backend:u # 20.40% backend cycles idle (75.06%) + 28,971,717,461 instructions:u # 3.03 insn per cycle + # 0.07 stalled cycles per insn (75.06%) + 2.827859944 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2723) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -138,8 +123,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -147,24 +132,27 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.862780e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.086642e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.086642e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.826812 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 8,110,327,392 cycles # 2.866 GHz - 15,189,804,265 instructions # 1.87 insn per cycle - 2.832751384 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3203) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.896446e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.233456e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.233456e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 +TOTAL : 1.961732 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,573,075,125 cycles:u # 3.330 GHz (74.88%) + 18,540,499 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.91%) + 2,224,016,527 stalled-cycles-backend:u # 33.84% backend cycles idle (74.90%) + 15,037,369,471 instructions:u # 2.29 insn per cycle + # 0.15 stalled cycles per insn (74.88%) + 1.978117739 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -172,8 +160,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -181,73 +169,9 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.093395e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.337729e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.337729e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 2.668875 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 7,696,372,242 cycles # 2.878 GHz - 14,484,401,690 instructions # 1.88 insn per cycle - 2.674814198 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2775) (512y: 304) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.225341e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.377311e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.377311e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 -TOTAL : 3.360677 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,555,331,117 cycles # 1.948 GHz - 9,892,801,123 instructions # 1.51 insn per cycle - 3.366641015 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1565) (512y: 216) (512z: 2216) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313472e+00 -Avg ME (F77/C++) = 4.3134712319139954 -Relative difference = 1.7806676491157786e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 31ad35f4d6..a827ba6b8b 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-10-04_11:51:39 -DATE: 2024-10-02_23:23:54 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.769640e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.787416e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.790414e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.468036 sec -INFO: No Floating Point Exceptions have been reported - 2,037,551,034 cycles # 2.955 GHz - 2,992,853,394 instructions # 1.47 insn per cycle - 0.746736203 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.582456e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.122819e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.124654e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.432968 sec +INFO: No Floating Point Exceptions have been reported + 1,090,146,119 cycles:u # 2.686 GHz (75.76%) + 2,302,256 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.88%) + 6,356,529 stalled-cycles-backend:u # 0.58% backend cycles idle (76.51%) + 1,570,621,288 instructions:u # 1.44 insn per cycle + # 0.00 stalled cycles per insn (76.59%) + 0.488099481 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.955252e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.072819e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.081098e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.486910 sec -INFO: No Floating Point Exceptions have been reported - 2,053,456,592 cycles # 2.899 GHz - 3,023,614,282 instructions # 1.47 insn per cycle - 0.768139647 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.109493e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.286013e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.286503e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.420410 sec +INFO: No Floating Point Exceptions have been reported + 1,200,943,623 cycles:u # 2.786 GHz (74.71%) + 2,519,175 stalled-cycles-frontend:u # 0.21% frontend cycles idle (73.61%) + 6,893,773 stalled-cycles-backend:u # 0.57% backend cycles idle (75.00%) + 1,703,958,868 instructions:u # 1.42 insn per cycle + # 0.00 stalled cycles per insn (75.05%) + 0.468305523 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176604E-006 -Relative difference = 3.3392753366481633e-07 +Avg ME (F77/GPU) = 8.1274562860176587E-006 +Relative difference = 3.3392753387325367e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.556594e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.560204e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.560204e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.150865 sec -INFO: No Floating Point Exceptions have been reported - 468,041,301 cycles # 3.038 GHz - 1,389,874,591 instructions # 2.97 insn per cycle - 0.154561545 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.139544e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.144887e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.144887e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.104786 sec +INFO: No Floating Point Exceptions have been reported + 371,842,346 cycles:u # 3.460 GHz (72.72%) + 29,514 stalled-cycles-frontend:u # 0.01% frontend cycles idle (70.57%) + 40,691,461 stalled-cycles-backend:u # 10.94% backend cycles idle (71.98%) + 1,347,611,870 instructions:u # 3.62 insn per cycle + # 0.03 stalled cycles per insn (75.70%) + 0.111859915 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1627) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.755475e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.769207e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.769207e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.080074 sec -INFO: No Floating Point Exceptions have been reported - 240,347,702 cycles # 2.886 GHz - 693,020,093 instructions # 2.88 insn per cycle - 0.083834683 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9482) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.003049e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.005201e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005201e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.054728 sec +INFO: No Floating Point Exceptions have been reported + 192,768,732 cycles:u # 3.365 GHz (73.34%) + 31,385 stalled-cycles-frontend:u # 0.02% frontend cycles idle (72.15%) + 21,025,011 stalled-cycles-backend:u # 10.91% backend cycles idle (72.15%) + 662,523,571 instructions:u # 3.44 insn per cycle + # 0.03 stalled cycles per insn (72.15%) + 0.061486153 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 8749) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.470546e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.476392e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.476392e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037947 sec -INFO: No Floating Point Exceptions have been reported - 113,951,288 cycles # 2.767 GHz - 257,914,170 instructions # 2.26 insn per cycle - 0.041775140 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8501) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.073377e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.082771e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.082771e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.027370 sec +INFO: No Floating Point Exceptions have been reported + 90,359,469 cycles:u # 3.007 GHz (73.90%) + 75,752 stalled-cycles-frontend:u # 0.08% frontend cycles idle (73.52%) + 11,570,982 stalled-cycles-backend:u # 12.81% backend cycles idle (73.52%) + 233,290,158 instructions:u # 2.58 insn per cycle + # 0.05 stalled cycles per insn (73.52%) + 0.034043713 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7869) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.587475e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.594909e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.594909e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.035255 sec -INFO: No Floating Point Exceptions have been reported - 102,623,828 cycles # 2.666 GHz - 240,025,776 instructions # 2.34 insn per cycle - 0.039073005 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8143) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.268803e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.274169e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274169e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.043872 sec -INFO: No Floating Point Exceptions have been reported - 90,257,947 cycles # 1.910 GHz - 134,303,865 instructions # 1.49 insn per cycle - 0.047785620 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1943) (512y: 126) (512z: 7086) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index 520fc6d267..e9d19cd062 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-10-04_11:51:45 -DATE: 2024-10-02_23:24:05 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.800320e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.818517e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.821599e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.470187 sec -INFO: No Floating Point Exceptions have been reported - 2,052,814,472 cycles # 2.969 GHz - 2,949,612,457 instructions # 1.44 insn per cycle - 0.750557916 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.854502e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.456024e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.457831e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.389807 sec +INFO: No Floating Point Exceptions have been reported + 1,103,828,674 cycles:u # 2.767 GHz (74.97%) + 2,466,335 stalled-cycles-frontend:u # 0.22% frontend cycles idle (76.13%) + 5,284,848 stalled-cycles-backend:u # 0.48% backend cycles idle (75.35%) + 1,547,668,644 instructions:u # 1.40 insn per cycle + # 0.00 stalled cycles per insn (75.31%) + 0.444109511 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.127619e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.255846e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.264216e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.483932 sec -INFO: No Floating Point Exceptions have been reported - 2,088,813,579 cycles # 2.962 GHz - 3,090,582,596 instructions # 1.48 insn per cycle - 0.765249817 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.131911e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.312774e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.313271e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.418749 sec +INFO: No Floating Point Exceptions have been reported + 1,170,737,076 cycles:u # 2.725 GHz (76.02%) + 2,504,950 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.22%) + 5,156,359 stalled-cycles-backend:u # 0.44% backend cycles idle (73.53%) + 1,648,323,219 instructions:u # 1.41 insn per cycle + # 0.00 stalled cycles per insn (73.44%) + 0.472468324 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176604E-006 -Relative difference = 3.3392753366481633e-07 +Avg ME (F77/GPU) = 8.1274562860176587E-006 +Relative difference = 3.3392753387325367e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.583197e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.586632e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.586632e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.148844 sec -INFO: No Floating Point Exceptions have been reported - 465,656,480 cycles # 3.065 GHz - 1,385,063,684 instructions # 2.97 insn per cycle - 0.152528488 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.152238e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.158063e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.158063e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.104212 sec +INFO: No Floating Point Exceptions have been reported + 369,886,605 cycles:u # 3.461 GHz (69.04%) + 34,788 stalled-cycles-frontend:u # 0.01% frontend cycles idle (72.29%) + 43,914,996 stalled-cycles-backend:u # 11.87% backend cycles idle (76.03%) + 1,330,155,157 instructions:u # 3.60 insn per cycle + # 0.03 stalled cycles per insn (77.58%) + 0.111616153 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1597) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.701779e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.714329e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.714329e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.079935 sec -INFO: No Floating Point Exceptions have been reported - 238,338,142 cycles # 2.869 GHz - 689,077,380 instructions # 2.89 insn per cycle - 0.083658919 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9525) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.908546e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.928912e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.928912e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.054799 sec +INFO: No Floating Point Exceptions have been reported + 192,144,173 cycles:u # 3.348 GHz (73.26%) + 27,034 stalled-cycles-frontend:u # 0.01% frontend cycles idle (72.20%) + 19,808,579 stalled-cycles-backend:u # 10.31% backend cycles idle (72.19%) + 659,238,962 instructions:u # 3.43 insn per cycle + # 0.03 stalled cycles per insn (72.19%) + 0.062052454 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 8794) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.516138e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.522347e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.522347e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.036146 sec -INFO: No Floating Point Exceptions have been reported - 111,533,372 cycles # 2.836 GHz - 253,485,212 instructions # 2.27 insn per cycle - 0.039854413 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8457) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.137277e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.146846e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.146846e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.026043 sec +INFO: No Floating Point Exceptions have been reported + 86,694,030 cycles:u # 3.023 GHz (72.65%) + 23,484 stalled-cycles-frontend:u # 0.03% frontend cycles idle (72.26%) + 8,835,392 stalled-cycles-backend:u # 10.19% backend cycles idle (72.26%) + 231,252,295 instructions:u # 2.67 insn per cycle + # 0.04 stalled cycles per insn (72.26%) + 0.032914407 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7839) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.619024e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.626212e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.626212e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.033802 sec -INFO: No Floating Point Exceptions have been reported - 100,180,790 cycles # 2.704 GHz - 235,622,302 instructions # 2.35 insn per cycle - 0.037533375 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8101) (512y: 150) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.260779e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.266519e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.266519e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.043311 sec -INFO: No Floating Point Exceptions have been reported - 88,103,069 cycles # 1.888 GHz - 129,731,242 instructions # 1.47 insn per cycle - 0.047213046 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 126) (512z: 7084) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274562860174791E-006 -Relative difference = 3.3392755596761116e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 5ff76d67ba..8c49ada640 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-10-04_11:51:50 -DATE: 2024-10-02_23:24:16 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.211219e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.220457e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.222410e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 -TOTAL : 0.474415 sec -INFO: No Floating Point Exceptions have been reported - 2,042,215,104 cycles # 2.959 GHz - 2,967,666,575 instructions # 1.45 insn per cycle - 0.749013771 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.132723e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.300853e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.301412e+04 ) sec^-1 +MeanMatrixElemValue = ( 3.100225e-04 +- 2.256521e-04 ) GeV^-4 +TOTAL : 0.360615 sec +INFO: No Floating Point Exceptions have been reported + 1,000,731,193 cycles:u # 2.709 GHz (74.29%) + 2,424,404 stalled-cycles-frontend:u # 0.24% frontend cycles idle (76.12%) + 5,293,783 stalled-cycles-backend:u # 0.53% backend cycles idle (76.52%) + 1,461,513,671 instructions:u # 1.46 insn per cycle + # 0.00 stalled cycles per insn (76.19%) + 0.408059445 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.889452e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.983579e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.991978e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.020494e-03 +- 4.025605e-03 ) GeV^-4 -TOTAL : 0.474378 sec -INFO: No Floating Point Exceptions have been reported - 2,044,733,349 cycles # 2.963 GHz - 2,989,289,340 instructions # 1.46 insn per cycle - 0.749063185 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.806295e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.371717e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.373128e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.043589e-02 +- 5.707640e-02 ) GeV^-4 +TOTAL : 0.380381 sec +INFO: No Floating Point Exceptions have been reported + 1,039,678,236 cycles:u # 2.656 GHz (75.58%) + 2,424,013 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.54%) + 6,246,846 stalled-cycles-backend:u # 0.60% backend cycles idle (76.28%) + 1,582,406,209 instructions:u # 1.52 insn per cycle + # 0.00 stalled cycles per insn (76.05%) + 0.431428242 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127250e-06 -Avg ME (F77/GPU) = 8.1272869669930272E-006 -Relative difference = 4.548524165778887e-06 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 8.127375e-06 +Avg ME (F77/GPU) = 8.1275160277913510E-006 +Relative difference = 1.735219444797551e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.559321e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.562914e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.562914e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.150601 sec -INFO: No Floating Point Exceptions have been reported - 464,247,537 cycles # 3.020 GHz - 1,382,106,488 instructions # 2.98 insn per cycle - 0.154369193 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.299642e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.305836e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.305836e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 +TOTAL : 0.101767 sec +INFO: No Floating Point Exceptions have been reported + 361,295,005 cycles:u # 3.459 GHz (72.61%) + 24,284 stalled-cycles-frontend:u # 0.01% frontend cycles idle (70.61%) + 45,702,302 stalled-cycles-backend:u # 12.65% backend cycles idle (73.41%) + 1,324,193,787 instructions:u # 3.67 insn per cycle + # 0.03 stalled cycles per insn (77.06%) + 0.109508489 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1635) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127811e-06 -Avg ME (F77/C++) = 8.1278105271212486E-006 -Relative difference = 5.8180333155894157e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127810e-06 +Avg ME (F77/C++) = 8.1278101435899343E-006 +Relative difference = 1.76664974860306e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.252858e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.257505e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.257505e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.044283 sec -INFO: No Floating Point Exceptions have been reported - 132,985,054 cycles # 2.803 GHz - 372,125,739 instructions # 2.80 insn per cycle - 0.048041967 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:10141) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.869110e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.877448e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877448e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 +TOTAL : 0.030178 sec +INFO: No Floating Point Exceptions have been reported + 101,110,114 cycles:u # 3.084 GHz (76.07%) + 26,656 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.72%) + 14,337,366 stalled-cycles-backend:u # 14.18% backend cycles idle (75.72%) + 343,617,527 instructions:u # 3.40 insn per cycle + # 0.04 stalled cycles per insn (75.72%) + 0.037469160 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9270) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127809e-06 -Avg ME (F77/C++) = 8.1278090510674588E-006 -Relative difference = 6.2830535070193674e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127807e-06 +Avg ME (F77/C++) = 8.1278071402353976E-006 +Relative difference = 1.725378052944308e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.855200e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.879676e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.879676e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.020512 sec -INFO: No Floating Point Exceptions have been reported - 65,226,143 cycles # 2.754 GHz - 142,813,798 instructions # 2.19 insn per cycle - 0.024211039 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9241) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.107990e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.147875e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.147875e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.014681 sec +INFO: No Floating Point Exceptions have been reported + 54,299,137 cycles:u # 3.141 GHz (64.92%) + 14,620 stalled-cycles-frontend:u # 0.03% frontend cycles idle (54.79%) + 3,861,986 stalled-cycles-backend:u # 7.11% backend cycles idle (54.13%) + 123,494,904 instructions:u # 2.27 insn per cycle + # 0.03 stalled cycles per insn (60.44%) + 0.021288975 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8628) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127535e-06 +Avg ME (F77/C++) = 8.1275351122593251E-006 +Relative difference = 1.3812222848044195e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.108853e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.137651e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.137651e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.019016 sec -INFO: No Floating Point Exceptions have been reported - 61,573,217 cycles # 2.773 GHz - 132,819,685 instructions # 2.16 insn per cycle - 0.022685850 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8959) (512y: 28) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.385538e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.406562e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.406562e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.024358 sec -INFO: No Floating Point Exceptions have been reported - 53,055,109 cycles # 1.895 GHz - 79,577,124 instructions # 1.50 insn per cycle - 0.028648864 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2836) (512y: 30) (512z: 7437) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275369863475849E-006 -Relative difference = 1.6797726498700304e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 662cc2f451..65e785a100 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-10-04_11:51:55 -DATE: 2024-10-02_23:24:27 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.235162e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.244960e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.246839e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 -TOTAL : 0.477065 sec -INFO: No Floating Point Exceptions have been reported - 2,025,818,805 cycles # 2.919 GHz - 2,939,784,013 instructions # 1.45 insn per cycle - 0.752407839 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.143836e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.307334e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.307896e+04 ) sec^-1 +MeanMatrixElemValue = ( 3.100225e-04 +- 2.256521e-04 ) GeV^-4 +TOTAL : 0.380148 sec +INFO: No Floating Point Exceptions have been reported + 1,009,847,531 cycles:u # 2.736 GHz (72.73%) + 2,537,228 stalled-cycles-frontend:u # 0.25% frontend cycles idle (73.78%) + 5,245,061 stalled-cycles-backend:u # 0.52% backend cycles idle (76.61%) + 1,458,641,375 instructions:u # 1.44 insn per cycle + # 0.00 stalled cycles per insn (76.62%) + 0.431707059 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.112799e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.201470e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.209428e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.020496e-03 +- 4.025606e-03 ) GeV^-4 -TOTAL : 0.472481 sec -INFO: No Floating Point Exceptions have been reported - 2,041,894,086 cycles # 2.955 GHz - 2,946,838,758 instructions # 1.44 insn per cycle - 0.748409052 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.816620e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.417499e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.418878e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.043589e-02 +- 5.707640e-02 ) GeV^-4 +TOTAL : 0.380664 sec +INFO: No Floating Point Exceptions have been reported + 1,061,615,308 cycles:u # 2.719 GHz (73.75%) + 2,499,819 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.06%) + 6,189,233 stalled-cycles-backend:u # 0.58% backend cycles idle (75.34%) + 1,572,709,866 instructions:u # 1.48 insn per cycle + # 0.00 stalled cycles per insn (75.31%) + 0.428586389 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 8.127250e-06 -Avg ME (F77/GPU) = 8.1272866419447706E-006 -Relative difference = 4.508529302013153e-06 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 8.127375e-06 +Avg ME (F77/GPU) = 8.1275164883853706E-006 +Relative difference = 1.740886637704508e-05 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.524192e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.527540e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.527540e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.151291 sec -INFO: No Floating Point Exceptions have been reported - 467,037,767 cycles # 3.023 GHz - 1,376,809,181 instructions # 2.95 insn per cycle - 0.154965126 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.297453e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.303599e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.303599e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 +TOTAL : 0.101240 sec +INFO: No Floating Point Exceptions have been reported + 352,808,274 cycles:u # 3.397 GHz (72.95%) + 29,031 stalled-cycles-frontend:u # 0.01% frontend cycles idle (76.41%) + 40,275,340 stalled-cycles-backend:u # 11.42% backend cycles idle (76.92%) + 1,323,447,323 instructions:u # 3.75 insn per cycle + # 0.03 stalled cycles per insn (76.92%) + 0.108960973 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1608) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127811e-06 -Avg ME (F77/C++) = 8.1278105271212486E-006 -Relative difference = 5.8180333155894157e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127810e-06 +Avg ME (F77/C++) = 8.1278101435899343E-006 +Relative difference = 1.76664974860306e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.250589e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.254973e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.254973e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 -TOTAL : 0.043394 sec -INFO: No Floating Point Exceptions have been reported - 130,510,666 cycles # 2.799 GHz - 367,293,969 instructions # 2.81 insn per cycle - 0.047185544 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:10124) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.906770e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.914671e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.914671e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 +TOTAL : 0.029000 sec +INFO: No Floating Point Exceptions have been reported + 98,322,942 cycles:u # 3.114 GHz (75.15%) + 21,620 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.79%) + 14,757,264 stalled-cycles-backend:u # 15.01% backend cycles idle (74.79%) + 343,482,210 instructions:u # 3.49 insn per cycle + # 0.04 stalled cycles per insn (74.79%) + 0.036350431 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9253) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127809e-06 -Avg ME (F77/C++) = 8.1278090510674588E-006 -Relative difference = 6.2830535070193674e-09 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127807e-06 +Avg ME (F77/C++) = 8.1278071402353976E-006 +Relative difference = 1.725378052944308e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.895966e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.919717e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.919717e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.019526 sec -INFO: No Floating Point Exceptions have been reported - 63,088,654 cycles # 2.773 GHz - 138,078,009 instructions # 2.19 insn per cycle - 0.023227465 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9196) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.090520e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.126703e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126703e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 +TOTAL : 0.014211 sec +INFO: No Floating Point Exceptions have been reported + 52,537,622 cycles:u # 3.131 GHz (64.84%) + 12,697 stalled-cycles-frontend:u # 0.02% frontend cycles idle (53.43%) + 4,440,172 stalled-cycles-backend:u # 8.45% backend cycles idle (52.77%) + 122,003,851 instructions:u # 2.32 insn per cycle + # 0.04 stalled cycles per insn (61.17%) + 0.021303380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8595) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127535e-06 +Avg ME (F77/C++) = 8.1275351122593251E-006 +Relative difference = 1.3812222848044195e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.167323e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.196847e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.196847e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 -TOTAL : 0.017922 sec -INFO: No Floating Point Exceptions have been reported - 58,004,801 cycles # 2.745 GHz - 127,991,431 instructions # 2.21 insn per cycle - 0.021624106 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8910) (512y: 28) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275366216540664E-006 -Relative difference = 4.655111786058001e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.372680e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.393901e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.393901e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 -TOTAL : 0.023632 sec -INFO: No Floating Point Exceptions have been reported - 50,117,827 cycles # 1.863 GHz - 74,764,014 instructions # 1.49 insn per cycle - 0.027462672 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2791) (512y: 30) (512z: 7439) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127537e-06 -Avg ME (F77/C++) = 8.1275369863475849E-006 -Relative difference = 1.6797726498700304e-09 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index 2860254d4c..c74dc823ad 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-10-04_11:52:00 -DATE: 2024-10-02_23:24:37 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.738978e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.756587e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.759630e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.470308 sec -INFO: No Floating Point Exceptions have been reported - 2,029,517,703 cycles # 2.933 GHz - 2,946,537,029 instructions # 1.45 insn per cycle - 0.750454094 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.619421e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.149565e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.151432e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.416231 sec +INFO: No Floating Point Exceptions have been reported + 1,112,118,022 cycles:u # 2.751 GHz (75.68%) + 2,293,715 stalled-cycles-frontend:u # 0.21% frontend cycles idle (76.54%) + 11,422,495 stalled-cycles-backend:u # 1.03% backend cycles idle (76.29%) + 1,549,747,180 instructions:u # 1.39 insn per cycle + # 0.01 stalled cycles per insn (76.39%) + 0.464311808 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.975249e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.105448e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.114521e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.483108 sec -INFO: No Floating Point Exceptions have been reported - 2,093,310,274 cycles # 2.962 GHz - 3,111,318,214 instructions # 1.49 insn per cycle - 0.763440898 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.124077e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.299484e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.300004e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.441718 sec +INFO: No Floating Point Exceptions have been reported + 1,258,352,274 cycles:u # 2.790 GHz (74.97%) + 2,486,955 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.14%) + 5,885,059 stalled-cycles-backend:u # 0.47% backend cycles idle (74.68%) + 1,742,144,024 instructions:u # 1.38 insn per cycle + # 0.00 stalled cycles per insn (74.17%) + 0.491667431 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405200E-006 -Relative difference = 3.3369094561706885e-07 +Avg ME (F77/GPU) = 8.1274562879405183E-006 +Relative difference = 3.336909458255062e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.479369e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.482863e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.482863e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.153894 sec -INFO: No Floating Point Exceptions have been reported - 471,996,695 cycles # 3.005 GHz - 1,398,458,325 instructions # 2.96 insn per cycle - 0.157639380 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.199069e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.205010e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.205010e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.103842 sec +INFO: No Floating Point Exceptions have been reported + 364,350,368 cycles:u # 3.422 GHz (69.40%) + 39,775 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.04%) + 34,532,847 stalled-cycles-backend:u # 9.48% backend cycles idle (77.50%) + 1,339,623,327 instructions:u # 3.68 insn per cycle + # 0.03 stalled cycles per insn (77.49%) + 0.110807545 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1630) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.817579e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.830221e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.830221e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.079435 sec -INFO: No Floating Point Exceptions have been reported - 237,264,825 cycles # 2.877 GHz - 688,242,182 instructions # 2.90 insn per cycle - 0.083121228 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9334) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.012681e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.015094e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.015094e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.054171 sec +INFO: No Floating Point Exceptions have been reported + 192,108,317 cycles:u # 3.383 GHz (73.86%) + 28,861 stalled-cycles-frontend:u # 0.02% frontend cycles idle (71.91%) + 22,278,155 stalled-cycles-backend:u # 11.60% backend cycles idle (71.90%) + 658,320,230 instructions:u # 3.43 insn per cycle + # 0.03 stalled cycles per insn (71.90%) + 0.061213813 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 8728) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.469077e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.475276e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.475276e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.038002 sec -INFO: No Floating Point Exceptions have been reported - 113,713,809 cycles # 2.755 GHz - 253,123,745 instructions # 2.23 insn per cycle - 0.041850302 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8363) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.136754e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.146316e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.146316e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.026658 sec +INFO: No Floating Point Exceptions have been reported + 97,797,679 cycles:u # 3.344 GHz (68.20%) + 28,245 stalled-cycles-frontend:u # 0.03% frontend cycles idle (72.80%) + 10,553,550 stalled-cycles-backend:u # 10.79% backend cycles idle (72.79%) + 229,743,009 instructions:u # 2.35 insn per cycle + # 0.05 stalled cycles per insn (72.79%) + 0.033574814 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7892) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.615978e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.623720e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.623720e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.034706 sec -INFO: No Floating Point Exceptions have been reported - 101,196,884 cycles # 2.667 GHz - 233,657,279 instructions # 2.31 insn per cycle - 0.038483246 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7501) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.233700e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.238685e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.238685e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.045046 sec -INFO: No Floating Point Exceptions have been reported - 91,035,012 cycles # 1.880 GHz - 133,158,052 instructions # 1.46 insn per cycle - 0.048995485 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2084) (512y: 122) (512z: 6354) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index 91c8760286..eaf646f1b2 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,83 +1,67 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +DATE: 2024-10-04_11:52:06 -DATE: 2024-10-02_23:24:48 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.782094e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.800671e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.804051e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.471374 sec -INFO: No Floating Point Exceptions have been reported - 2,059,228,408 cycles # 2.969 GHz - 2,976,693,819 instructions # 1.45 insn per cycle - 0.751857693 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.891944e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.503892e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.505792e+03 ) sec^-1 +MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 +TOTAL : 0.389977 sec +INFO: No Floating Point Exceptions have been reported + 1,080,122,392 cycles:u # 2.704 GHz (75.45%) + 2,305,877 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.49%) + 6,693,024 stalled-cycles-backend:u # 0.62% backend cycles idle (76.00%) + 1,564,769,595 instructions:u # 1.45 insn per cycle + # 0.00 stalled cycles per insn (76.45%) + 0.445401382 seconds time elapsed ......................................................................... -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.066550e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.182190e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.190564e+05 ) sec^-1 -MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 -TOTAL : 0.485261 sec -INFO: No Floating Point Exceptions have been reported - 2,087,825,759 cycles # 2.964 GHz - 3,088,551,405 instructions # 1.48 insn per cycle - 0.765530482 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.144095e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.329788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.330291e+05 ) sec^-1 +MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 +TOTAL : 0.421263 sec +INFO: No Floating Point Exceptions have been reported + 1,206,970,978 cycles:u # 2.804 GHz (74.04%) + 2,604,515 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.09%) + 6,058,522 stalled-cycles-backend:u # 0.50% backend cycles idle (74.66%) + 1,701,293,584 instructions:u # 1.41 insn per cycle + # 0.00 stalled cycles per insn (75.82%) + 0.474343055 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -85,33 +69,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405200E-006 -Relative difference = 3.3369094561706885e-07 +Avg ME (F77/GPU) = 8.1274562879405183E-006 +Relative difference = 3.336909458255062e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.501790e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.505136e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.505136e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.152240 sec -INFO: No Floating Point Exceptions have been reported - 470,061,720 cycles # 3.025 GHz - 1,393,763,209 instructions # 2.97 insn per cycle - 0.155889798 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.126453e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.132278e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.132278e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.104730 sec +INFO: No Floating Point Exceptions have been reported + 362,909,682 cycles:u # 3.378 GHz (70.55%) + 34,800 stalled-cycles-frontend:u # 0.01% frontend cycles idle (72.02%) + 44,810,281 stalled-cycles-backend:u # 12.35% backend cycles idle (75.74%) + 1,342,031,847 instructions:u # 3.70 insn per cycle + # 0.03 stalled cycles per insn (77.69%) + 0.111618296 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1603) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -119,31 +104,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.954658e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.968212e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.968212e+03 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.077211 sec -INFO: No Floating Point Exceptions have been reported - 235,223,590 cycles # 2.925 GHz - 684,213,441 instructions # 2.91 insn per cycle - 0.080969906 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9368) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.005583e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.007752e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.007752e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.053854 sec +INFO: No Floating Point Exceptions have been reported + 181,501,491 cycles:u # 3.216 GHz (71.73%) + 26,353 stalled-cycles-frontend:u # 0.01% frontend cycles idle (71.73%) + 22,628,933 stalled-cycles-backend:u # 12.47% backend cycles idle (71.73%) + 673,671,420 instructions:u # 3.71 insn per cycle + # 0.03 stalled cycles per insn (72.78%) + 0.060579522 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 8787) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -151,31 +139,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.468005e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.473933e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.473933e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.037269 sec -INFO: No Floating Point Exceptions have been reported - 111,406,073 cycles # 2.752 GHz - 248,660,524 instructions # 2.23 insn per cycle - 0.041010123 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8316) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.127532e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.137036e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.137036e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 +TOTAL : 0.026209 sec +INFO: No Floating Point Exceptions have been reported + 96,195,626 cycles:u # 3.334 GHz (81.02%) + 13,254 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.24%) + 11,053,673 stalled-cycles-backend:u # 11.49% backend cycles idle (72.43%) + 226,834,304 instructions:u # 2.36 insn per cycle + # 0.05 stalled cycles per insn (72.42%) + 0.033077061 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7874) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -183,76 +174,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.687371e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.694987e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.694987e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.032517 sec -INFO: No Floating Point Exceptions have been reported - 99,075,407 cycles # 2.779 GHz - 229,256,995 instructions # 2.31 insn per cycle - 0.036194322 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7452) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.125360e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.130339e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.130339e+04 ) sec^-1 -MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 -TOTAL : 0.048501 sec -INFO: No Floating Point Exceptions have been reported - 88,927,475 cycles # 1.713 GHz - 128,580,821 instructions # 1.45 insn per cycle - 0.052459192 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2035) (512y: 122) (512z: 6355) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127459e-06 -Avg ME (F77/C++) = 8.1274563450143301E-006 -Relative difference = 3.266686019634872e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index bad45a7dc8..4f73e04d01 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-10-04_11:51:05 -DATE: 2024-10-02_23:22:43 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.879555e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.325400e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.788674e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.519459 sec -INFO: No Floating Point Exceptions have been reported - 2,192,488,330 cycles # 2.904 GHz - 3,108,589,457 instructions # 1.42 insn per cycle - 0.811901500 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.205763e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.282357e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.339950e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 +TOTAL : 0.363554 sec +INFO: No Floating Point Exceptions have been reported + 824,654,219 cycles:u # 2.250 GHz (74.45%) + 2,378,121 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.16%) + 5,258,443 stalled-cycles-backend:u # 0.64% backend cycles idle (76.09%) + 1,405,112,573 instructions:u # 1.70 insn per cycle + # 0.00 stalled cycles per insn (75.85%) + 0.423315562 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 Relative difference = 2.590743366698123e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.365007e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.070287e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.070287e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.231113 sec -INFO: No Floating Point Exceptions have been reported - 3,770,884,627 cycles # 3.051 GHz - 9,730,787,613 instructions # 2.58 insn per cycle - 1.236813254 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.170090e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.317717e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.317717e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 1.084559 sec +INFO: No Floating Point Exceptions have been reported + 3,490,848,997 cycles:u # 3.181 GHz (74.75%) + 7,660,767 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.48%) + 9,573,661 stalled-cycles-backend:u # 0.27% backend cycles idle (74.84%) + 9,510,925,502 instructions:u # 2.72 insn per cycle + # 0.00 stalled cycles per insn (75.22%) + 1.101615381 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 332) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.578999e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033336e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033336e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.776953 sec -INFO: No Floating Point Exceptions have been reported - 2,334,361,876 cycles # 2.984 GHz - 5,933,594,772 instructions # 2.54 insn per cycle - 0.782905833 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.200255e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.821135e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.821135e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.664149 sec +INFO: No Floating Point Exceptions have been reported + 2,027,787,140 cycles:u # 2.998 GHz (74.61%) + 8,263,760 stalled-cycles-frontend:u # 0.41% frontend cycles idle (75.11%) + 12,856,869 stalled-cycles-backend:u # 0.63% backend cycles idle (75.17%) + 5,831,439,407 instructions:u # 2.88 insn per cycle + # 0.00 stalled cycles per insn (75.17%) + 0.680595379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1321) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.298604e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.378530e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.378530e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.568056 sec -INFO: No Floating Point Exceptions have been reported - 1,681,243,313 cycles # 2.932 GHz - 3,315,595,889 instructions # 1.97 insn per cycle - 0.574037989 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1499) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.423841e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.000143e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.000143e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.494024 sec +INFO: No Floating Point Exceptions have been reported + 1,417,663,203 cycles:u # 2.800 GHz (74.74%) + 8,507,099 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.86%) + 18,358,587 stalled-cycles-backend:u # 1.29% backend cycles idle (74.86%) + 3,268,344,350 instructions:u # 2.31 insn per cycle + # 0.01 stalled cycles per insn (74.73%) + 0.510616829 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.355034e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.488075e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.488075e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.558433 sec -INFO: No Floating Point Exceptions have been reported - 1,640,005,974 cycles # 2.909 GHz - 3,285,268,931 instructions # 2.00 insn per cycle - 0.564410411 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1375) (512y: 96) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.255707e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.292044e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.292044e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.576788 sec -INFO: No Floating Point Exceptions have been reported - 1,373,892,799 cycles # 2.360 GHz - 2,425,202,745 instructions # 1.77 insn per cycle - 0.582721873 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 580) (512y: 60) (512z: 1021) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index 8744af06d4..a70a2e7d3c 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-10-04_11:51:11 -DATE: 2024-10-02_23:22:55 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.982500e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.466123e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.977983e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.518522 sec -INFO: No Floating Point Exceptions have been reported - 2,233,076,106 cycles # 2.958 GHz - 3,164,749,953 instructions # 1.42 insn per cycle - 0.811884376 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.339198e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.280910e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.338288e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 +TOTAL : 0.365386 sec +INFO: No Floating Point Exceptions have been reported + 841,771,538 cycles:u # 2.296 GHz (74.97%) + 2,469,523 stalled-cycles-frontend:u # 0.29% frontend cycles idle (73.99%) + 5,625,984 stalled-cycles-backend:u # 0.67% backend cycles idle (74.23%) + 1,399,458,696 instructions:u # 1.66 insn per cycle + # 0.00 stalled cycles per insn (74.61%) + 0.427605675 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 Relative difference = 2.590743366698123e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.301306e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.064535e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.064535e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.238968 sec -INFO: No Floating Point Exceptions have been reported - 3,730,421,090 cycles # 2.998 GHz - 9,611,838,153 instructions # 2.58 insn per cycle - 1.245009902 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.177270e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.322334e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.322334e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 1.078822 sec +INFO: No Floating Point Exceptions have been reported + 3,468,850,927 cycles:u # 3.179 GHz (74.79%) + 8,097,787 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.08%) + 13,115,680 stalled-cycles-backend:u # 0.38% backend cycles idle (75.08%) + 9,432,282,791 instructions:u # 2.72 insn per cycle + # 0.00 stalled cycles per insn (75.08%) + 1.095862672 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 342) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.519835e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.952712e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.952712e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.801104 sec -INFO: No Floating Point Exceptions have been reported - 2,353,664,883 cycles # 2.919 GHz - 5,879,099,517 instructions # 2.50 insn per cycle - 0.807062172 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1340) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.211326e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.827359e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.827359e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.661082 sec +INFO: No Floating Point Exceptions have been reported + 1,999,223,079 cycles:u # 2.967 GHz (74.92%) + 7,891,752 stalled-cycles-frontend:u # 0.39% frontend cycles idle (75.08%) + 16,453,867 stalled-cycles-backend:u # 0.82% backend cycles idle (75.08%) + 5,834,514,752 instructions:u # 2.92 insn per cycle + # 0.00 stalled cycles per insn (75.07%) + 0.678122223 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1295) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.306572e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.401136e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.401136e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.564979 sec -INFO: No Floating Point Exceptions have been reported - 1,668,493,167 cycles # 2.925 GHz - 3,288,096,894 instructions # 1.97 insn per cycle - 0.571004997 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1436) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.413437e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.985270e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.985270e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.493893 sec +INFO: No Floating Point Exceptions have been reported + 1,414,426,875 cycles:u # 2.795 GHz (75.14%) + 8,214,642 stalled-cycles-frontend:u # 0.58% frontend cycles idle (74.74%) + 16,376,840 stalled-cycles-backend:u # 1.16% backend cycles idle (74.72%) + 3,277,774,628 instructions:u # 2.32 insn per cycle + # 0.00 stalled cycles per insn (74.85%) + 0.510737818 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1418) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.353584e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.490021e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.490021e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.556005 sec -INFO: No Floating Point Exceptions have been reported - 1,637,480,739 cycles # 2.917 GHz - 3,262,503,753 instructions # 1.99 insn per cycle - 0.561947958 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1328) (512y: 96) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.278727e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.296527e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.296527e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.572881 sec -INFO: No Floating Point Exceptions have been reported - 1,396,071,165 cycles # 2.414 GHz - 2,410,100,240 instructions # 1.73 insn per cycle - 0.578909062 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 547) (512y: 60) (512z: 1007) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956172964268 -Relative difference = 2.59074336294025e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 319b533795..3f2ab68f19 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-10-04_11:51:17 -DATE: 2024-10-02_23:23:07 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.021736e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.095898e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.502720e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 -TOTAL : 0.484746 sec -INFO: No Floating Point Exceptions have been reported - 2,097,572,068 cycles # 2.947 GHz - 2,993,117,399 instructions # 1.43 insn per cycle - 0.769929348 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 97 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 7.509378e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.972058e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.041321e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.485983e-01 +- 3.276854e-05 ) GeV^0 +TOTAL : 0.325556 sec +INFO: No Floating Point Exceptions have been reported + 821,644,211 cycles:u # 2.440 GHz (74.04%) + 2,420,357 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.41%) + 12,629,346 stalled-cycles-backend:u # 1.54% backend cycles idle (74.81%) + 1,378,416,866 instructions:u # 1.68 insn per cycle + # 0.01 stalled cycles per insn (74.88%) + 0.379986913 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477195e-01 -Avg ME (F77/GPU) = 0.14771956735057756 -Relative difference = 4.559355911674916e-07 +Avg ME (F77/GPU) = 0.14771957969060168 +Relative difference = 5.394724574150425e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.485111e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.089179e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.089179e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 1.192909 sec -INFO: No Floating Point Exceptions have been reported - 3,665,476,463 cycles # 3.060 GHz - 9,601,549,579 instructions # 2.62 insn per cycle - 1.198508580 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.379058e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.578973e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.578973e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 +TOTAL : 0.908172 sec +INFO: No Floating Point Exceptions have been reported + 2,981,211,693 cycles:u # 3.254 GHz (74.72%) + 6,588,015 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.69%) + 4,890,728 stalled-cycles-backend:u # 0.16% backend cycles idle (74.76%) + 9,481,627,819 instructions:u # 3.18 insn per cycle + # 0.00 stalled cycles per insn (74.77%) + 0.920815479 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 432) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.258115e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.376765e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.376765e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 0.551301 sec -INFO: No Floating Point Exceptions have been reported - 1,637,946,426 cycles # 2.944 GHz - 3,967,582,411 instructions # 2.42 insn per cycle - 0.556978816 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1579) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.468028e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.171493e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.171493e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283183e-05 ) GeV^0 +TOTAL : 0.449474 sec +INFO: No Floating Point Exceptions have been reported + 1,387,037,153 cycles:u # 3.032 GHz (74.51%) + 6,234,875 stalled-cycles-frontend:u # 0.45% frontend cycles idle (75.37%) + 19,626,012 stalled-cycles-backend:u # 1.41% backend cycles idle (75.52%) + 3,856,671,799 instructions:u # 2.78 insn per cycle + # 0.01 stalled cycles per insn (75.52%) + 0.461712887 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1513) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955861942843 -Relative difference = 2.80129187869649e-07 +Avg ME (F77/C++) = 0.14771955448668450 +Relative difference = 3.081061382869002e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.152306e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.639356e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.639356e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.420259 sec -INFO: No Floating Point Exceptions have been reported - 1,264,212,435 cycles # 2.972 GHz - 2,497,364,762 instructions # 1.98 insn per cycle - 0.425990331 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1924) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.654464e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.000589e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.000589e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283177e-05 ) GeV^0 +TOTAL : 0.373131 sec +INFO: No Floating Point Exceptions have been reported + 1,093,114,711 cycles:u # 2.868 GHz (74.83%) + 5,539,006 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.83%) + 11,753,677 stalled-cycles-backend:u # 1.08% backend cycles idle (74.82%) + 2,419,335,654 instructions:u # 2.21 insn per cycle + # 0.00 stalled cycles per insn (74.98%) + 0.385911766 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1876) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 +Avg ME (F77/C++) = 0.14771955128526315 +Relative difference = 3.2977842382139064e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.176305e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.859286e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.859286e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.418213 sec -INFO: No Floating Point Exceptions have been reported - 1,244,133,116 cycles # 2.939 GHz - 2,473,380,671 instructions # 1.99 insn per cycle - 0.423994842 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1870) (512y: 1) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.060336e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.249952e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.249952e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 -TOTAL : 0.431162 sec -INFO: No Floating Point Exceptions have been reported - 1,082,620,148 cycles # 2.481 GHz - 2,073,283,815 instructions # 1.92 insn per cycle - 0.436955508 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1011) (512y: 5) (512z: 1292) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955262403935 -Relative difference = 3.207154680524219e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 30254feeab..9145b856d6 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-10-04_11:51:22 -DATE: 2024-10-02_23:23:19 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.019401e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.048318e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.455629e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 -TOTAL : 0.481010 sec -INFO: No Floating Point Exceptions have been reported - 2,088,372,875 cycles # 2.945 GHz - 2,964,890,992 instructions # 1.42 insn per cycle - 0.766303026 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 86 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 7.522939e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.946792e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.014454e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.485983e-01 +- 3.276854e-05 ) GeV^0 +TOTAL : 0.344754 sec +INFO: No Floating Point Exceptions have been reported + 799,483,065 cycles:u # 2.366 GHz (74.85%) + 2,309,346 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.34%) + 6,615,642 stalled-cycles-backend:u # 0.83% backend cycles idle (76.39%) + 1,387,436,934 instructions:u # 1.74 insn per cycle + # 0.00 stalled cycles per insn (73.91%) + 0.403460468 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477195e-01 -Avg ME (F77/GPU) = 0.14771956525510177 -Relative difference = 4.4175008557828484e-07 +Avg ME (F77/GPU) = 0.14771957969060168 +Relative difference = 5.394724574150425e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.478146e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.094736e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.094736e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 1.193560 sec -INFO: No Floating Point Exceptions have been reported - 3,623,971,187 cycles # 3.024 GHz - 9,471,432,296 instructions # 2.61 insn per cycle - 1.199132805 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 367) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.383132e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.585694e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.585694e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 +TOTAL : 0.906227 sec +INFO: No Floating Point Exceptions have been reported + 2,961,341,832 cycles:u # 3.239 GHz (74.89%) + 6,546,623 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.65%) + 8,080,120 stalled-cycles-backend:u # 0.27% backend cycles idle (74.70%) + 9,414,623,269 instructions:u # 3.18 insn per cycle + # 0.00 stalled cycles per insn (74.70%) + 0.918418496 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 337) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.293885e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.464836e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.464836e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 -TOTAL : 0.543877 sec -INFO: No Floating Point Exceptions have been reported - 1,640,922,140 cycles # 2.988 GHz - 3,933,388,950 instructions # 2.40 insn per cycle - 0.549660540 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1517) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.501547e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.197208e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.197208e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283183e-05 ) GeV^0 +TOTAL : 0.450826 sec +INFO: No Floating Point Exceptions have been reported + 1,381,505,679 cycles:u # 3.010 GHz (74.30%) + 6,213,946 stalled-cycles-frontend:u # 0.45% frontend cycles idle (75.15%) + 9,694,256 stalled-cycles-backend:u # 0.70% backend cycles idle (75.60%) + 3,820,776,901 instructions:u # 2.77 insn per cycle + # 0.00 stalled cycles per insn (75.60%) + 0.463548588 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1479) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955861942843 -Relative difference = 2.80129187869649e-07 +Avg ME (F77/C++) = 0.14771955448668450 +Relative difference = 3.081061382869002e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.096652e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.526616e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.526616e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.424433 sec -INFO: No Floating Point Exceptions have been reported - 1,265,916,102 cycles # 2.948 GHz - 2,482,033,677 instructions # 1.96 insn per cycle - 0.430083916 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1817) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.671406e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.045498e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.045498e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283177e-05 ) GeV^0 +TOTAL : 0.371300 sec +INFO: No Floating Point Exceptions have been reported + 1,092,610,661 cycles:u # 2.881 GHz (74.81%) + 5,305,410 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.70%) + 33,831,557 stalled-cycles-backend:u # 3.10% backend cycles idle (74.69%) + 2,378,087,655 instructions:u # 2.18 insn per cycle + # 0.01 stalled cycles per insn (74.73%) + 0.383374559 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1802) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 +Avg ME (F77/C++) = 0.14771955128526315 +Relative difference = 3.2977842382139064e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.099768e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.536640e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.536640e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 -TOTAL : 0.425471 sec -INFO: No Floating Point Exceptions have been reported - 1,239,687,962 cycles # 2.879 GHz - 2,457,003,272 instructions # 1.98 insn per cycle - 0.431204562 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1773) (512y: 1) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955698961392 -Relative difference = 2.9116235141448046e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.076752e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.347849e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.347849e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 -TOTAL : 0.427038 sec -INFO: No Floating Point Exceptions have been reported - 1,082,096,190 cycles # 2.503 GHz - 2,057,508,420 instructions # 1.90 insn per cycle - 0.432876705 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 906) (512y: 5) (512z: 1273) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955262403935 -Relative difference = 3.207154680524219e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index c992dd1560..620a232d6e 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-10-04_11:51:28 -DATE: 2024-10-02_23:23:30 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.870947e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.292610e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.748112e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.517185 sec -INFO: No Floating Point Exceptions have been reported - 2,235,637,342 cycles # 2.968 GHz - 3,165,178,455 instructions # 1.42 insn per cycle - 0.810025271 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.287948e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.300323e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.359514e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 +TOTAL : 0.350690 sec +INFO: No Floating Point Exceptions have been reported + 790,684,072 cycles:u # 2.155 GHz (76.09%) + 2,255,006 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.21%) + 8,212,728 stalled-cycles-backend:u # 1.04% backend cycles idle (74.05%) + 1,515,061,170 instructions:u # 1.92 insn per cycle + # 0.01 stalled cycles per insn (73.59%) + 0.411230761 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 Relative difference = 2.5810037581511336e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.276369e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.059318e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.059318e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.243225 sec -INFO: No Floating Point Exceptions have been reported - 3,811,509,127 cycles # 3.053 GHz - 9,755,893,754 instructions # 2.56 insn per cycle - 1.249011242 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.073332e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.193079e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.193079e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 1.165645 sec +INFO: No Floating Point Exceptions have been reported + 3,760,508,128 cycles:u # 3.181 GHz (74.97%) + 9,799,535 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.04%) + 82,319,711 stalled-cycles-backend:u # 2.19% backend cycles idle (75.04%) + 9,617,990,540 instructions:u # 2.56 insn per cycle + # 0.01 stalled cycles per insn (74.98%) + 1.186960560 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 332) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.575213e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.033630e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.033630e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.777751 sec -INFO: No Floating Point Exceptions have been reported - 2,324,158,098 cycles # 2.968 GHz - 5,921,190,869 instructions # 2.55 insn per cycle - 0.783772418 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.257401e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.939163e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.939163e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.651275 sec +INFO: No Floating Point Exceptions have been reported + 1,960,210,789 cycles:u # 2.953 GHz (74.19%) + 7,743,342 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.19%) + 9,288,328 stalled-cycles-backend:u # 0.47% backend cycles idle (74.70%) + 5,855,328,189 instructions:u # 2.99 insn per cycle + # 0.00 stalled cycles per insn (75.19%) + 0.668603584 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1383) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.318378e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.429052e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.429052e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.565758 sec -INFO: No Floating Point Exceptions have been reported - 1,652,981,708 cycles # 2.895 GHz - 3,254,347,551 instructions # 1.97 insn per cycle - 0.571727030 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1567) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.497892e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.159332e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.159332e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.487677 sec +INFO: No Floating Point Exceptions have been reported + 1,407,046,199 cycles:u # 2.814 GHz (74.51%) + 8,503,247 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.55%) + 15,903,763 stalled-cycles-backend:u # 1.13% backend cycles idle (74.44%) + 3,151,478,847 instructions:u # 2.24 insn per cycle + # 0.01 stalled cycles per insn (75.20%) + 0.504440981 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1546) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 +Avg ME (F77/C++) = 0.14771956675526976 +Relative difference = 2.2505293980258705e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.435162e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.624330e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.624330e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.542890 sec -INFO: No Floating Point Exceptions have been reported - 1,608,327,569 cycles # 2.934 GHz - 3,210,329,014 instructions # 2.00 insn per cycle - 0.548955457 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1446) (512y: 101) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.277841e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.302624e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.302624e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.573796 sec -INFO: No Floating Point Exceptions have been reported - 1,366,629,222 cycles # 2.360 GHz - 2,377,238,088 instructions # 1.74 insn per cycle - 0.579856899 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 768) (512y: 64) (512z: 1063) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index 1ec6ca11ae..95e26b8533 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +DATE: 2024-10-04_11:51:33 -DATE: 2024-10-02_23:23:42 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.955347e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.449634e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.971675e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.519560 sec -INFO: No Floating Point Exceptions have been reported - 2,229,656,114 cycles # 2.956 GHz - 3,136,915,829 instructions # 1.41 insn per cycle - 0.813453217 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 5.319682e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.271681e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.328454e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 +TOTAL : 0.348858 sec +INFO: No Floating Point Exceptions have been reported + 836,700,042 cycles:u # 2.292 GHz (75.28%) + 2,521,349 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.13%) + 6,955,479 stalled-cycles-backend:u # 0.83% backend cycles idle (74.89%) + 1,467,285,506 instructions:u # 1.75 insn per cycle + # 0.00 stalled cycles per insn (73.54%) + 0.411037215 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 Relative difference = 2.5810037581511336e-07 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 9.306555e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.063008e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.063008e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 1.238276 sec -INFO: No Floating Point Exceptions have been reported - 3,773,723,631 cycles # 3.035 GHz - 9,644,120,028 instructions # 2.56 insn per cycle - 1.244186863 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.168828e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.312276e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.312276e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 1.084440 sec +INFO: No Floating Point Exceptions have been reported + 3,504,205,366 cycles:u # 3.194 GHz (74.49%) + 7,959,362 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.85%) + 13,470,086 stalled-cycles-backend:u # 0.38% backend cycles idle (75.22%) + 9,469,384,445 instructions:u # 2.70 insn per cycle + # 0.00 stalled cycles per insn (75.21%) + 1.101421195 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 343) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.549168e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.991454e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.991454e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.789049 sec -INFO: No Floating Point Exceptions have been reported - 2,313,346,456 cycles # 2.912 GHz - 5,848,887,121 instructions # 2.53 insn per cycle - 0.794970078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1371) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.280836e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.941406e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.941406e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.646416 sec +INFO: No Floating Point Exceptions have been reported + 1,944,085,411 cycles:u # 2.952 GHz (74.64%) + 7,522,720 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.60%) + 16,790,089 stalled-cycles-backend:u # 0.86% backend cycles idle (74.60%) + 5,886,824,204 instructions:u # 3.03 insn per cycle + # 0.00 stalled cycles per insn (74.56%) + 0.663422839 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1353) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.347614e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.473937e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.473937e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.557834 sec -INFO: No Floating Point Exceptions have been reported - 1,655,348,908 cycles # 2.940 GHz - 3,217,952,635 instructions # 1.94 insn per cycle - 0.563871078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1483) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.510803e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.168157e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.168157e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 +TOTAL : 0.485381 sec +INFO: No Floating Point Exceptions have been reported + 1,393,931,403 cycles:u # 2.799 GHz (74.55%) + 8,424,009 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.44%) + 25,161,499 stalled-cycles-backend:u # 1.81% backend cycles idle (74.32%) + 3,171,112,938 instructions:u # 2.27 insn per cycle + # 0.01 stalled cycles per insn (74.68%) + 0.502324189 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1487) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 +Avg ME (F77/C++) = 0.14771956675526976 +Relative difference = 2.2505293980258705e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.424845e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.621915e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.621915e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.543698 sec -INFO: No Floating Point Exceptions have been reported - 1,602,341,227 cycles # 2.919 GHz - 3,182,199,907 instructions # 1.99 insn per cycle - 0.549609066 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1382) (512y: 101) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.297489e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.339579e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.339579e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 -TOTAL : 0.569767 sec -INFO: No Floating Point Exceptions have been reported - 1,382,180,389 cycles # 2.403 GHz - 2,361,725,571 instructions # 1.71 insn per cycle - 0.575784231 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 716) (512y: 64) (512z: 1056) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956674392650 -Relative difference = 2.2512972893324335e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 370e514c12..b8e944a251 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:49:56 -DATE: 2024-10-02_23:20:23 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.230162e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.323594e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.002154e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.536130 sec -INFO: No Floating Point Exceptions have been reported - 2,256,394,755 cycles # 2.938 GHz - 3,245,914,401 instructions # 1.44 insn per cycle - 0.828827482 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.825932e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.303839e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.322040e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 +TOTAL : 0.410068 sec +INFO: No Floating Point Exceptions have been reported + 997,958,849 cycles:u # 2.373 GHz (75.23%) + 2,279,294 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.29%) + 6,782,942 stalled-cycles-backend:u # 0.68% backend cycles idle (75.46%) + 1,625,055,048 instructions:u # 1.63 insn per cycle + # 0.00 stalled cycles per insn (74.44%) + 0.467403205 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 -Avg ME (F77/GPU) = 2.0158358666195562 -Relative difference = 6.616631711254798e-08 +Avg ME (F77/GPU) = 2.0158358666195553 +Relative difference = 6.616631755314852e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.895732e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.944199e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.944199e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.637832 sec -INFO: No Floating Point Exceptions have been reported - 17,273,065,240 cycles # 3.061 GHz - 45,923,472,217 instructions # 2.66 insn per cycle - 5.643410439 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.544927e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.606801e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.606801e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 4.307024 sec +INFO: No Floating Point Exceptions have been reported + 14,759,851,104 cycles:u # 3.418 GHz (75.00%) + 10,213,686 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.99%) + 2,963,621,521 stalled-cycles-backend:u # 20.08% backend cycles idle (74.99%) + 45,578,208,957 instructions:u # 3.09 insn per cycle + # 0.07 stalled cycles per insn (75.00%) + 4.323134979 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 663) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194407 -Relative difference = 6.616637439061751e-08 +Avg ME (F77/C++) = 2.0158358666194411 +Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.297798e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.461035e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.461035e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.287380 sec -INFO: No Floating Point Exceptions have been reported - 10,057,055,600 cycles # 3.055 GHz - 27,804,384,494 instructions # 2.76 insn per cycle - 3.293195334 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2537) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.343987e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.531990e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.531990e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 2.593460 sec +INFO: No Floating Point Exceptions have been reported + 8,804,265,677 cycles:u # 3.380 GHz (74.95%) + 8,608,560 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.10%) + 2,711,216,699 stalled-cycles-backend:u # 30.79% backend cycles idle (75.13%) + 27,713,688,883 instructions:u # 3.15 insn per cycle + # 0.10 stalled cycles per insn (75.13%) + 2.609709313 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2458) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.984936e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.354618e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.354618e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.209636 sec -INFO: No Floating Point Exceptions have been reported - 6,102,986,954 cycles # 2.763 GHz - 12,589,726,132 instructions # 2.06 insn per cycle - 2.215628249 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2620) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.337973e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.867218e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.867218e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 1.608838 sec +INFO: No Floating Point Exceptions have been reported + 5,332,671,248 cycles:u # 3.290 GHz (74.86%) + 8,580,627 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.87%) + 548,978,982 stalled-cycles-backend:u # 10.29% backend cycles idle (74.86%) + 12,401,583,918 instructions:u # 2.33 insn per cycle + # 0.04 stalled cycles per insn (74.83%) + 1.625027653 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2492) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.327606e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.775533e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.775533e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.077880 sec -INFO: No Floating Point Exceptions have been reported - 5,579,947,178 cycles # 2.679 GHz - 12,003,081,651 instructions # 2.15 insn per cycle - 2.084004672 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2365) (512y: 144) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.667640e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.860946e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.860946e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.965237 sec -INFO: No Floating Point Exceptions have been reported - 5,764,359,655 cycles # 1.943 GHz - 8,342,529,257 instructions # 1.45 insn per cycle - 2.971031508 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 122) (512z: 1806) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 4a0767e5de..8097702dbb 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:50:09 -DATE: 2024-10-02_23:20:47 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.355605e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.277087e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.956218e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.530876 sec -INFO: No Floating Point Exceptions have been reported - 2,249,324,155 cycles # 2.931 GHz - 3,226,562,604 instructions # 1.43 insn per cycle - 0.824282948 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.860299e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.359622e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.378594e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 +TOTAL : 0.400310 sec +INFO: No Floating Point Exceptions have been reported + 977,387,830 cycles:u # 2.341 GHz (74.17%) + 2,517,211 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.55%) + 6,318,079 stalled-cycles-backend:u # 0.65% backend cycles idle (75.23%) + 1,636,223,246 instructions:u # 1.67 insn per cycle + # 0.00 stalled cycles per insn (75.17%) + 0.463223853 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 -Avg ME (F77/GPU) = 2.0158358666195562 -Relative difference = 6.616631711254798e-08 +Avg ME (F77/GPU) = 2.0158358666195553 +Relative difference = 6.616631755314852e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.940475e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.991632e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.991632e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.508327 sec -INFO: No Floating Point Exceptions have been reported - 16,765,096,335 cycles # 3.041 GHz - 44,907,213,075 instructions # 2.68 insn per cycle - 5.514387413 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 566) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.672609e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.739968e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.739968e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 4.102592 sec +INFO: No Floating Point Exceptions have been reported + 14,079,409,845 cycles:u # 3.422 GHz (74.98%) + 8,049,289 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) + 2,310,838,592 stalled-cycles-backend:u # 16.41% backend cycles idle (74.93%) + 44,472,298,847 instructions:u # 3.16 insn per cycle + # 0.05 stalled cycles per insn (74.92%) + 4.118929817 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 592) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.469638e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.652475e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.652475e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.131046 sec -INFO: No Floating Point Exceptions have been reported - 9,519,736,258 cycles # 3.036 GHz - 26,678,539,115 instructions # 2.80 insn per cycle - 3.137009684 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2326) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.610444e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.829618e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.829618e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 2.453155 sec +INFO: No Floating Point Exceptions have been reported + 8,297,206,118 cycles:u # 3.367 GHz (75.01%) + 9,142,863 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.01%) + 1,462,081,092 stalled-cycles-backend:u # 17.62% backend cycles idle (75.01%) + 26,753,959,008 instructions:u # 3.22 insn per cycle + # 0.05 stalled cycles per insn (75.03%) + 2.468792614 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2278) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.671787e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.002601e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.002601e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.352280 sec -INFO: No Floating Point Exceptions have been reported - 6,629,963,277 cycles # 2.812 GHz - 14,109,636,377 instructions # 2.13 insn per cycle - 2.358209355 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2705) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.582782e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.005097e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.005097e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 1.770963 sec +INFO: No Floating Point Exceptions have been reported + 5,924,884,751 cycles:u # 3.324 GHz (74.79%) + 10,170,880 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.89%) + 1,235,930,126 stalled-cycles-backend:u # 20.86% backend cycles idle (74.90%) + 14,218,104,856 instructions:u # 2.40 insn per cycle + # 0.09 stalled cycles per insn (74.88%) + 1.786720730 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2700) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.754606e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.104698e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.104698e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.312900 sec -INFO: No Floating Point Exceptions have been reported - 6,361,189,972 cycles # 2.744 GHz - 13,713,824,218 instructions # 2.16 insn per cycle - 2.319011188 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2356) (512y: 298) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = DOUBLE (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.432030e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.604686e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.604686e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.161395 sec -INFO: No Floating Point Exceptions have been reported - 5,974,388,712 cycles # 1.887 GHz - 10,105,486,265 instructions # 1.69 insn per cycle - 3.167180711 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1318) (512y: 208) (512z: 1986) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194953 -Relative difference = 6.616634729368461e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index 171c4f07f1..de9a53846a 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:50:21 -DATE: 2024-10-02_23:21:12 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.343508e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.749333e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.880185e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 -TOTAL : 0.485800 sec -INFO: No Floating Point Exceptions have been reported - 2,094,905,997 cycles # 2.937 GHz - 3,016,360,566 instructions # 1.44 insn per cycle - 0.770368991 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.029624e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.168974e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.192805e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.064391e+00 +- 3.343192e-03 ) GeV^0 +TOTAL : 0.334128 sec +INFO: No Floating Point Exceptions have been reported + 847,605,952 cycles:u # 2.446 GHz (74.31%) + 2,404,984 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.19%) + 6,809,593 stalled-cycles-backend:u # 0.80% backend cycles idle (76.16%) + 1,440,149,727 instructions:u # 1.70 insn per cycle + # 0.00 stalled cycles per insn (74.02%) + 0.392018881 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.015841e+00 -Avg ME (F77/GPU) = 2.0158787037944421 -Relative difference = 1.870375413642407e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.015844e+00 +Avg ME (F77/GPU) = 2.0158466693246737 +Relative difference = 1.3241722443517625e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.003751e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.061477e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.061477e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 -TOTAL : 5.317047 sec -INFO: No Floating Point Exceptions have been reported - 16,226,729,405 cycles # 3.049 GHz - 45,319,748,869 instructions # 2.79 insn per cycle - 5.322657984 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.916433e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.996682e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.996682e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 +TOTAL : 3.739424 sec +INFO: No Floating Point Exceptions have been reported + 12,919,555,123 cycles:u # 3.448 GHz (74.96%) + 7,161,924 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.02%) + 2,653,034,768 stalled-cycles-backend:u # 20.54% backend cycles idle (75.02%) + 45,463,370,048 instructions:u # 3.52 insn per cycle + # 0.06 stalled cycles per insn (75.02%) + 3.751360428 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 667) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158491701586172 -Relative difference = 8.441039850630506e-08 +Avg ME (F77/C++) = 2.0158491450129077 +Relative difference = 7.193639399772436e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.661368e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.006222e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.006222e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 -TOTAL : 2.333881 sec -INFO: No Floating Point Exceptions have been reported - 7,065,193,815 cycles # 3.021 GHz - 17,792,282,713 instructions # 2.52 insn per cycle - 2.339489027 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3147) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.114956e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.484341e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.484341e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 +TOTAL : 1.859533 sec +INFO: No Floating Point Exceptions have been reported + 6,325,374,109 cycles:u # 3.388 GHz (75.03%) + 6,631,466 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.15%) + 2,746,926,513 stalled-cycles-backend:u # 43.43% backend cycles idle (75.15%) + 17,097,211,499 instructions:u # 2.70 insn per cycle + # 0.16 stalled cycles per insn (75.15%) + 1.871211674 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2902) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158486895961687 -Relative difference = 1.539816876576819e-07 +Avg ME (F77/C++) = 2.0158492142800242 +Relative difference = 1.0629765641719438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.680930e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.902131e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.902131e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.293085 sec -INFO: No Floating Point Exceptions have been reported - 3,745,244,491 cycles # 2.886 GHz - 8,263,077,102 instructions # 2.21 insn per cycle - 1.298740126 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3371) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.194705e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.337360e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.337360e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.065802e+00 +- 3.352030e-03 ) GeV^0 +TOTAL : 1.021215 sec +INFO: No Floating Point Exceptions have been reported + 3,362,486,824 cycles:u # 3.269 GHz (75.11%) + 6,869,200 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.11%) + 830,943,627 stalled-cycles-backend:u # 24.71% backend cycles idle (75.11%) + 8,093,573,532 instructions:u # 2.41 insn per cycle + # 0.10 stalled cycles per insn (75.11%) + 1.033064527 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3258) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015848e+00 +Avg ME (F77/C++) = 2.0158479403471574 +Relative difference = 2.9591934841076347e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.127600e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.045053e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.045053e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.236021 sec -INFO: No Floating Point Exceptions have been reported - 3,554,738,616 cycles # 2.865 GHz - 7,914,272,775 instructions # 2.23 insn per cycle - 1.241584729 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3214) (512y: 20) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.816839e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.519320e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.519320e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.622579 sec -INFO: No Floating Point Exceptions have been reported - 3,259,303,388 cycles # 2.003 GHz - 6,101,587,749 instructions # 1.87 insn per cycle - 1.628190659 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2258) (512y: 22) (512z: 2156) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015848e+00 -Avg ME (F77/C++) = 2.0158476348733529 -Relative difference = 1.8112806478434436e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index 5827327dd2..cbd2b02691 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:50:31 -DATE: 2024-10-02_23:21:32 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.278999e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.762585e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.886988e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 -TOTAL : 0.489792 sec -INFO: No Floating Point Exceptions have been reported - 2,055,512,500 cycles # 2.867 GHz - 2,939,151,591 instructions # 1.43 insn per cycle - 0.774255420 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 6.141035e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.214548e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.240427e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.064391e+00 +- 3.343192e-03 ) GeV^0 +TOTAL : 0.338852 sec +INFO: No Floating Point Exceptions have been reported + 846,172,164 cycles:u # 2.420 GHz (75.55%) + 2,384,656 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.94%) + 8,880,088 stalled-cycles-backend:u # 1.05% backend cycles idle (75.39%) + 1,440,550,238 instructions:u # 1.70 insn per cycle + # 0.01 stalled cycles per insn (77.34%) + 0.396188916 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 -Avg ME (C++/GPU) = 2.015841e+00 -Avg ME (F77/GPU) = 2.0158787037944421 -Relative difference = 1.870375413642407e-05 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +Avg ME (C++/GPU) = 2.015844e+00 +Avg ME (F77/GPU) = 2.0158466693246737 +Relative difference = 1.3241722443517625e-06 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.955650e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.011909e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.011909e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 -TOTAL : 5.448356 sec -INFO: No Floating Point Exceptions have been reported - 15,943,191,357 cycles # 2.924 GHz - 44,424,518,586 instructions # 2.79 insn per cycle - 5.454103934 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 533) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.042121e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.129542e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.129542e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 +TOTAL : 3.591543 sec +INFO: No Floating Point Exceptions have been reported + 12,399,003,358 cycles:u # 3.445 GHz (74.89%) + 7,265,600 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.90%) + 1,294,290,338 stalled-cycles-backend:u # 10.44% backend cycles idle (74.94%) + 44,252,750,290 instructions:u # 3.57 insn per cycle + # 0.03 stalled cycles per insn (75.03%) + 3.603349091 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 571) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158491701586172 -Relative difference = 8.441039850630506e-08 +Avg ME (F77/C++) = 2.0158491450129077 +Relative difference = 7.193639399772436e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.276402e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.747216e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.747216e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 -TOTAL : 2.074473 sec -INFO: No Floating Point Exceptions have been reported - 6,074,931,142 cycles # 2.922 GHz - 17,078,265,912 instructions # 2.81 insn per cycle - 2.080193584 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2862) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.535434e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.112084e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.112084e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 +TOTAL : 1.535162 sec +INFO: No Floating Point Exceptions have been reported + 5,185,887,199 cycles:u # 3.362 GHz (74.95%) + 6,649,344 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.11%) + 1,503,459,114 stalled-cycles-backend:u # 28.99% backend cycles idle (75.11%) + 16,935,312,911 instructions:u # 3.27 insn per cycle + # 0.09 stalled cycles per insn (75.11%) + 1.547341857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2752) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158486895961687 -Relative difference = 1.539816876576819e-07 +Avg ME (F77/C++) = 2.0158492142800242 +Relative difference = 1.0629765641719438e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.007855e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.581033e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.581033e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.830417 sec -INFO: No Floating Point Exceptions have been reported - 5,038,064,439 cycles # 2.745 GHz - 10,225,598,218 instructions # 2.03 insn per cycle - 1.836161273 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3906) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.902619e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.674327e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.674327e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065802e+00 +- 3.352030e-03 ) GeV^0 +TOTAL : 1.321463 sec +INFO: No Floating Point Exceptions have been reported + 4,431,538,272 cycles:u # 3.335 GHz (74.77%) + 7,094,416 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.77%) + 1,688,909,827 stalled-cycles-backend:u # 38.11% backend cycles idle (74.77%) + 10,255,351,383 instructions:u # 2.31 insn per cycle + # 0.16 stalled cycles per insn (74.94%) + 1.333077945 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3884) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015848e+00 +Avg ME (F77/C++) = 2.0158479403471574 +Relative difference = 2.9591934841076347e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.986593e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.564461e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.564461e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 1.838696 sec -INFO: No Floating Point Exceptions have been reported - 4,986,170,011 cycles # 2.706 GHz - 9,996,697,446 instructions # 2.00 insn per cycle - 1.844536408 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3805) (512y: 2) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015847e+00 -Avg ME (F77/C++) = 2.0158474864438176 -Relative difference = 2.4130988992271984e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = FLOAT (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.589226e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.912431e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.912431e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 -TOTAL : 2.372568 sec -INFO: No Floating Point Exceptions have been reported - 4,377,668,270 cycles # 1.841 GHz - 8,445,524,154 instructions # 1.93 insn per cycle - 2.378514848 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2744) (512y: 4) (512z: 2754) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015848e+00 -Avg ME (F77/C++) = 2.0158476348733529 -Relative difference = 1.8112806478434436e-07 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 4c61e46c6d..ad357326a9 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:50:41 -DATE: 2024-10-02_23:21:54 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.251838e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.183380e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.939643e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.535615 sec -INFO: No Floating Point Exceptions have been reported - 2,198,949,202 cycles # 2.843 GHz - 3,150,067,963 instructions # 1.43 insn per cycle - 0.831211671 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.844695e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.314566e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.332811e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 +TOTAL : 0.399769 sec +INFO: No Floating Point Exceptions have been reported + 1,000,344,858 cycles:u # 2.398 GHz (75.57%) + 2,483,436 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.29%) + 5,811,310 stalled-cycles-backend:u # 0.58% backend cycles idle (74.17%) + 1,551,135,214 instructions:u # 1.55 insn per cycle + # 0.00 stalled cycles per insn (76.46%) + 0.463433890 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 Relative difference = 6.751024171044779e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.793078e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.838862e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.838862e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.957870 sec -INFO: No Floating Point Exceptions have been reported - 17,383,086,317 cycles # 2.915 GHz - 46,074,988,832 instructions # 2.65 insn per cycle - 5.963882040 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.571404e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.633916e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.633916e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 4.258083 sec +INFO: No Floating Point Exceptions have been reported + 14,636,986,021 cycles:u # 3.428 GHz (74.93%) + 9,201,381 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.90%) + 2,834,488,725 stalled-cycles-backend:u # 19.37% backend cycles idle (74.93%) + 45,648,548,771 instructions:u # 3.12 insn per cycle + # 0.06 stalled cycles per insn (75.03%) + 4.274649438 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.194287e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.355552e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.355552e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.392799 sec -INFO: No Floating Point Exceptions have been reported - 9,911,878,237 cycles # 2.918 GHz - 27,589,860,886 instructions # 2.78 insn per cycle - 3.398866655 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.331049e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.526012e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.526012e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 2.601226 sec +INFO: No Floating Point Exceptions have been reported + 8,833,488,747 cycles:u # 3.381 GHz (74.92%) + 9,265,639 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.92%) + 2,771,133,878 stalled-cycles-backend:u # 31.37% backend cycles idle (74.90%) + 27,586,457,635 instructions:u # 3.12 insn per cycle + # 0.10 stalled cycles per insn (75.03%) + 2.617092705 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2518) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.099557e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.502113e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.502113e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.164835 sec -INFO: No Floating Point Exceptions have been reported - 6,014,043,358 cycles # 2.771 GHz - 12,488,668,893 instructions # 2.08 insn per cycle - 2.170853663 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2776) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.500359e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.058510e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.058510e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 1.577290 sec +INFO: No Floating Point Exceptions have been reported + 5,237,176,956 cycles:u # 3.297 GHz (74.84%) + 9,167,020 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.86%) + 1,282,972,972 stalled-cycles-backend:u # 24.50% backend cycles idle (74.86%) + 12,276,243,394 instructions:u # 2.34 insn per cycle + # 0.10 stalled cycles per insn (74.83%) + 1.592961731 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2671) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 +Avg ME (F77/C++) = 2.0158359151896224 +Relative difference = 4.20720623263505e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.772169e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.266403e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.266403e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 1.920250 sec -INFO: No Floating Point Exceptions have been reported - 5,548,106,991 cycles # 2.882 GHz - 11,923,814,669 instructions # 2.15 insn per cycle - 1.926159830 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2521) (512y: 146) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.789351e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.994932e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.994932e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.873374 sec -INFO: No Floating Point Exceptions have been reported - 5,656,356,995 cycles # 1.965 GHz - 8,113,165,976 instructions # 1.43 insn per cycle - 2.879222217 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1865) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index 9c262ab65b..2c5c1083f9 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,68 +1,49 @@ -Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cpp512y (was cppauto) +Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cppavx2 (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasCurand +HASCURAND=hasNoCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cuda -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make: Nothing to be done for 'all'. -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +DATE: 2024-10-04_11:50:53 -DATE: 2024-10-02_23:22:19 - -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.276232e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.390219e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005905e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 0.527346 sec -INFO: No Floating Point Exceptions have been reported - 2,272,920,837 cycles # 2.964 GHz - 3,201,602,686 instructions # 1.41 insn per cycle - 0.824609816 seconds time elapsed -runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 -==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 -==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +EvtsPerSec[Rmb+ME] (23) = ( 1.854334e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.340714e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.359394e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 +TOTAL : 0.395885 sec +INFO: No Floating Point Exceptions have been reported + 1,009,668,218 cycles:u # 2.443 GHz (74.93%) + 2,342,458 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.08%) + 7,222,790 stalled-cycles-backend:u # 0.72% backend cycles idle (74.93%) + 1,606,228,617 instructions:u # 1.59 insn per cycle + # 0.00 stalled cycles per insn (73.70%) + 0.455473650 seconds time elapsed ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -70,33 +51,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 Relative difference = 6.751024171044779e-08 OK (relative difference <= 5E-3) ========================================================================= -Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.916084e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.966623e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.966623e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 5.577101 sec -INFO: No Floating Point Exceptions have been reported - 16,950,562,354 cycles # 3.037 GHz - 45,091,377,881 instructions # 2.66 insn per cycle - 5.582979015 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.627354e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.693079e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.693079e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 4.170234 sec +INFO: No Floating Point Exceptions have been reported + 14,290,330,106 cycles:u # 3.417 GHz (74.98%) + 8,190,052 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) + 709,468,716 stalled-cycles-backend:u # 4.96% backend cycles idle (74.96%) + 44,665,806,699 instructions:u # 3.13 insn per cycle + # 0.02 stalled cycles per insn (74.96%) + 4.186752470 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -104,31 +86,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.424687e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.599685e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.599685e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 3.168080 sec -INFO: No Floating Point Exceptions have been reported - 9,533,110,078 cycles # 3.005 GHz - 26,250,804,820 instructions # 2.75 insn per cycle - 3.173990668 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.396197e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.594671e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.594671e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 2.564153 sec +INFO: No Floating Point Exceptions have been reported + 8,706,948,556 cycles:u # 3.381 GHz (74.87%) + 11,021,237 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.84%) + 1,233,272,424 stalled-cycles-backend:u # 14.16% backend cycles idle (74.97%) + 26,375,882,323 instructions:u # 3.03 insn per cycle + # 0.05 stalled cycles per insn (75.12%) + 2.580435264 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2311) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -136,31 +121,34 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.704288e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.029318e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.029318e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.333614 sec -INFO: No Floating Point Exceptions have been reported - 6,735,900,933 cycles # 2.880 GHz - 14,030,236,491 instructions # 2.08 insn per cycle - 2.339440984 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.489478e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.904989e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.904989e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 +TOTAL : 1.794954 sec +INFO: No Floating Point Exceptions have been reported + 6,019,924,912 cycles:u # 3.333 GHz (74.79%) + 9,633,706 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.82%) + 1,768,172,037 stalled-cycles-backend:u # 29.37% backend cycles idle (75.04%) + 13,981,192,969 instructions:u # 2.32 insn per cycle + # 0.13 stalled cycles per insn (75.20%) + 1.810890060 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2870) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -168,76 +156,16 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 +Avg ME (F77/C++) = 2.0158359151896224 +Relative difference = 4.20720623263505e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.936210e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.298362e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.298362e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.229547 sec -INFO: No Floating Point Exceptions have been reported - 6,391,727,814 cycles # 2.861 GHz - 13,514,455,678 instructions # 2.11 insn per cycle - 2.235403459 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2531) (512y: 302) (512z: 0) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= -runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -FP precision = MIXED (NaN/abnormal=0, zero=0) -Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.837043e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.047080e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.047080e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 -TOTAL : 2.838742 sec -INFO: No Floating Point Exceptions have been reported - 5,600,700,385 cycles # 1.969 GHz - 9,206,380,773 instructions # 1.64 insn per cycle - 2.844839134 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2059) -------------------------------------------------------------------------- -runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe -INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -[ PASSED ] 4 tests. -DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } -INFO: No Floating Point Exceptions have been reported -DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } -INFO: No Floating Point Exceptions have been reported -------------------------------------------------------------------------- -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359178371690 -Relative difference = 4.0758688308634e-08 -OK (relative difference <= 5E-3) +/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) ========================================================================= TEST COMPLETED From 07c2a535b2714fc44495fcfc8ecaa72e4f06038e Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 4 Oct 2024 16:15:28 +0300 Subject: [PATCH 10/11] [amd] rerun 30 tmad tests on LUMI worker node (small-g 72h) - no change (heft fails #833, skip ggttggg #933) ./tmad/allTees.sh -hip STARTED AT Fri 04 Oct 2024 11:53:26 AM EEST (SM tests) ENDED(1) AT Fri 04 Oct 2024 02:12:45 PM EEST [Status=0] (BSM tests) ENDED(1) AT Fri 04 Oct 2024 02:22:24 PM EEST [Status=0] 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt 12 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt 1 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt 16 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt eemumu MEK processed 81920 events across 2 channels { 1 : 81920 } eemumu MEK processed 8192 events across 2 channels { 1 : 8192 } ggttggg MEK processed 81920 events across 1240 channels { 1 : 81920 } ggttggg MEK processed 8192 events across 1240 channels { 1 : 8192 } ggttgg MEK processed 81920 events across 123 channels { 112 : 81920 } ggttgg MEK processed 8192 events across 123 channels { 112 : 8192 } ggttg MEK processed 81920 events across 16 channels { 1 : 81920 } ggttg MEK processed 8192 events across 16 channels { 1 : 8192 } ggtt MEK processed 81920 events across 3 channels { 1 : 81920 } ggtt MEK processed 8192 events across 3 channels { 1 : 8192 } gqttq MEK processed 81920 events across 5 channels { 1 : 81920 } gqttq MEK processed 8192 events across 5 channels { 1 : 8192 } heftggbb MEK processed 81920 events across 4 channels { 1 : 81920 } heftggbb MEK processed 8192 events across 4 channels { 1 : 8192 } smeftggtttt MEK processed 81920 events across 72 channels { 1 : 81920 } smeftggtttt MEK processed 8192 events across 72 channels { 1 : 8192 } susyggt1t1 MEK processed 81920 events across 6 channels { 3 : 81920 } susyggt1t1 MEK processed 8192 events across 6 channels { 3 : 8192 } susyggtt MEK processed 81920 events across 3 channels { 1 : 81920 } susyggtt MEK processed 8192 events across 3 channels { 1 : 8192 } --- .../log_eemumu_mad_d_inl0_hrd0.txt | 444 +++++----------- .../log_eemumu_mad_f_inl0_hrd0.txt | 466 ++++++----------- .../log_eemumu_mad_m_inl0_hrd0.txt | 458 ++++++---------- .../log_ggtt_mad_d_inl0_hrd0.txt | 450 ++++++---------- .../log_ggtt_mad_f_inl0_hrd0.txt | 460 ++++++---------- .../log_ggtt_mad_m_inl0_hrd0.txt | 454 ++++++---------- .../log_ggttg_mad_d_inl0_hrd0.txt | 462 ++++++---------- .../log_ggttg_mad_f_inl0_hrd0.txt | 464 ++++++----------- .../log_ggttg_mad_m_inl0_hrd0.txt | 462 ++++++---------- .../log_ggttgg_mad_d_inl0_hrd0.txt | 462 ++++++---------- .../log_ggttgg_mad_f_inl0_hrd0.txt | 463 ++++++---------- .../log_ggttgg_mad_m_inl0_hrd0.txt | 464 ++++++----------- .../log_ggttggg_mad_d_inl0_hrd0.txt | 488 ++++------------- .../log_ggttggg_mad_f_inl0_hrd0.txt | 492 ++++-------------- .../log_ggttggg_mad_m_inl0_hrd0.txt | 488 ++++------------- .../log_gqttq_mad_d_inl0_hrd0.txt | 466 ++++++----------- .../log_gqttq_mad_f_inl0_hrd0.txt | 466 ++++++----------- .../log_gqttq_mad_m_inl0_hrd0.txt | 466 ++++++----------- .../log_heftggbb_mad_d_inl0_hrd0.txt | 462 ++++++---------- .../log_heftggbb_mad_f_inl0_hrd0.txt | 100 ++-- .../log_heftggbb_mad_m_inl0_hrd0.txt | 474 ++++++----------- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 462 ++++++---------- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 464 ++++++----------- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 466 ++++++----------- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 448 ++++++---------- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 456 ++++++---------- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 458 ++++++---------- .../log_susyggtt_mad_d_inl0_hrd0.txt | 456 ++++++---------- .../log_susyggtt_mad_f_inl0_hrd0.txt | 462 ++++++---------- .../log_susyggtt_mad_m_inl0_hrd0.txt | 456 ++++++---------- 30 files changed, 4308 insertions(+), 9231 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 9b0b9f8c70..e5f1acd639 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-10-02_23:58:28 +DATE: 2024-10-04_11:57:12 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7338s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7265s - [COUNTERS] Fortran MEs ( 1 ) : 0.0074s for 8192 events => throughput is 1.11E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4787s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4734s + [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2177s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2099s - [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1354s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1301s + [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7144s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6411s - [COUNTERS] Fortran MEs ( 1 ) : 0.0732s for 81920 events => throughput is 1.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3495s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2965s + [COUNTERS] Fortran MEs ( 1 ) : 0.0530s for 81920 events => throughput is 1.54E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,14 +134,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2160s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2089s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 8192 events => throughput is 1.20E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1393s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1336s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.48E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7098s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6394s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0701s for 81920 events => throughput is 1.17E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3513s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2963s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0549s for 81920 events => throughput is 1.49E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -183,14 +183,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155936e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.482917e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.172560e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.528805e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,14 +214,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2151s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2107s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.94E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1387s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1349s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.20E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (3.3306690738754696e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,9 +239,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6961s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6516s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 81920 events => throughput is 1.86E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3310s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2967s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0342s for 81920 events => throughput is 2.40E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -263,14 +263,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.918531e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.513769e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.020683e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.535871e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,9 +284,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -294,14 +294,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2148s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2112s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.44E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1370s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1345s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.40E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6734s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0320s for 81920 events => throughput is 2.56E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3204s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2964s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0239s for 81920 events => throughput is 3.43E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -343,96 +343,22 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.548719e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.709801e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.718686e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.792075e+06 ) sec^-1 -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2119s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2083s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.47E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6695s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6378s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 81920 events => throughput is 2.60E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.686657e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.772609e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,110 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2162s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2118s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.97E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4096s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3963s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.56E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0080s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432789448173971E-002) differ by less than 3E-14 (0.0) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6858s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6475s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0380s for 81920 events => throughput is 2.16E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519892E-002) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.112929e+06 ) sec^-1 +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.169699e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.6439s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6405s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.81E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 1.0816s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0737s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 81920 events => throughput is 1.11E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.5811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5641s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0096s for 81920 events => throughput is 8.49E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0074s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103909519892E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711103909519892E-002) differ by less than 3E-14 (0.0) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.312523e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.692916e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.728376e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.782692e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.551104e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.860215e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.941874e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.103935e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.534696e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.861582e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.933441e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.118406e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.510361e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.829015e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.195345e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.606029e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index 05be9e9d6c..d284b6241b 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-10-02_23:58:46 +DATE: 2024-10-04_11:57:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7495s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7422s - [COUNTERS] Fortran MEs ( 1 ) : 0.0073s for 8192 events => throughput is 1.13E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4699s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4645s + [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2243s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2166s - [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1380s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1326s + [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7353s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6581s - [COUNTERS] Fortran MEs ( 1 ) : 0.0772s for 81920 events => throughput is 1.06E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3567s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s + [COUNTERS] Fortran MEs ( 1 ) : 0.0532s for 81920 events => throughput is 1.54E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432777382586498E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432776035199060E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2248s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2172s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1374s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1327s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.78E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432777382586498E-002) differ by less than 4E-4 (1.305336294610271e-07) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432776035199060E-002) differ by less than 4E-4 (1.4511057155885965e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711091925143637E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711090687154856E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7074s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6411s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0661s for 81920 events => throughput is 1.24E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3430s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2974s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 81920 events => throughput is 1.80E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711091925143637E-002) differ by less than 4E-4 (1.3067530257870885e-07) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711090687154856E-002) differ by less than 4E-4 (1.4417409099909406e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.208440e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.920464e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.231118e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.927577e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432774839452045E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432793908398633E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2102s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.17E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.1355s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1334s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.06E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774839452045E-002) differ by less than 4E-4 (1.5804696607002455e-07) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432793908398633E-002) differ by less than 4E-4 (4.8253706141920816e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711089416628339E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711108423277371E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6669s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6399s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 81920 events => throughput is 3.06E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3010s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 81920 events => throughput is 4.15E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089416628339E-002) differ by less than 4E-4 (1.5802766439865223e-07) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711108423277371E-002) differ by less than 4E-4 (4.921713170347175e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.117302e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.453098e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.242056e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.598556e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432793820194981E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2112s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2085s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.24E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.1355s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1336s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.53E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432793820194981E-002) differ by less than 4E-4 (4.729945990433748e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711108407854763E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6658s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6410s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 81920 events => throughput is 3.32E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3146s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2969s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0176s for 81920 events => throughput is 4.66E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711108407854763E-002) differ by less than 4E-4 (4.904896666602099e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.403974e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.601663e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2097s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.46E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6715s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6476s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 81920 events => throughput is 3.46E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.561752e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.628047e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.728317e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432778556608516E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2152s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2123s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.00E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432778556608516E-002) differ by less than 4E-4 (1.1783227071848756e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711093118690828E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6740s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6481s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 81920 events => throughput is 3.19E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.051156e+06 ) sec^-1 -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711093118690828E-002) differ by less than 4E-4 (1.1766109664357316e-07) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.431784e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.540493e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432780016531851E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432778459280288E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.6457s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6423s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.85E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.4133s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4014s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 8192 events => throughput is 1.71E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0071s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432780016531851E-002) differ by less than 4E-4 (1.0203783951112655e-07) +OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432778459280288E-002) differ by less than 4E-4 (1.1888523265835005e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711094767039689E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711093172690286E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 1.0769s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0691s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 81920 events => throughput is 1.13E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.5755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5604s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 81920 events => throughput is 1.09E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0076s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711094767039689E-002) differ by less than 4E-4 (9.968782199720749e-08) +OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711093172690286E-002) differ by less than 4E-4 (1.1707229707891287e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.450419e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.835558e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.716246e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.780130e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.468932e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.126928e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.284727e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.638837e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.811258e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.019568e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.220962e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.427394e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.347565e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.528018e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.807469e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.846143e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index ceb72487c4..249ba624f2 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-10-02_23:59:05 +DATE: 2024-10-04_11:57:33 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.7200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7127s - [COUNTERS] Fortran MEs ( 1 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4948s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4894s + [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2141s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2067s - [COUNTERS] Fortran MEs ( 1 ) : 0.0074s for 8192 events => throughput is 1.10E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.1397s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1340s + [COUNTERS] Fortran MEs ( 1 ) : 0.0057s for 8192 events => throughput is 1.44E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7093s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6362s - [COUNTERS] Fortran MEs ( 1 ) : 0.0731s for 81920 events => throughput is 1.12E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3525s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2992s + [COUNTERS] Fortran MEs ( 1 ) : 0.0534s for 81920 events => throughput is 1.54E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,14 +134,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2165s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2089s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1432s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1374s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.44E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448297203334505e-11) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103904317942E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.7136s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0712s for 81920 events => throughput is 1.15E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2972s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0543s for 81920 events => throughput is 1.51E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317942E-002) differ by less than 2E-4 (5.672107228349432e-11) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.143586e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.513314e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.169403e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.609489e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,14 +214,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2119s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2075s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.99E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1385s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1348s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.26E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448297203334505e-11) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103904317942E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6805s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6396s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0407s for 81920 events => throughput is 2.01E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3296s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2962s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0334s for 81920 events => throughput is 2.46E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317942E-002) differ by less than 2E-4 (5.672107228349432e-11) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.010636e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.484196e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.105629e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.644939e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432789444494401E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2123s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.1372s + [COUNTERS] Fortran Overhead ( 0 ) : 0.1346s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.25E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) +OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444494401E-002) differ by less than 2E-4 (3.980804574865715e-11) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103899063479E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6720s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6400s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 81920 events => throughput is 2.58E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3241s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3000s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 81920 events => throughput is 3.41E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063479E-002) differ by less than 2E-4 (1.1401468658078784e-10) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.589631e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.649265e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2135s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2101s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.58E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6783s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6471s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0310s for 81920 events => throughput is 2.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.654351e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.551066e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.743225e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.728629e+06 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' -DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.2155s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2113s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.09E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' -DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6837s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6466s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0369s for 81920 events => throughput is 2.22E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.209789e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.334386e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789437826970E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432789437826984E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.6444s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6410s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.4332s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4198s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.56E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0082s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789437826970E-002) differ by less than 2E-4 (1.1194101201539297e-10) +OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432789437826984E-002) differ by less than 2E-4 (1.1194067894848558e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103901050417E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 1.0867s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0788s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 81920 events => throughput is 1.12E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.6023s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5839s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 81920 events => throughput is 8.18E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0084s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11) +OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.281389e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.703628e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.611764e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.701689e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.513316e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.862061e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.841595e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.148327e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.527747e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.835643e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.926367e+08 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.122290e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.529012e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.837124e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.175131e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.604412e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index fcf8054bf9..4fdc427195 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-02_23:59:24 +DATE: 2024-10-04_11:57:44 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.8251s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7830s - [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.7026s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6744s + [COUNTERS] Fortran MEs ( 1 ) : 0.0283s for 8192 events => throughput is 2.90E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4396s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3999s - [COUNTERS] Fortran MEs ( 1 ) : 0.0397s for 8192 events => throughput is 2.06E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3192s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2913s + [COUNTERS] Fortran MEs ( 1 ) : 0.0279s for 8192 events => throughput is 2.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9664s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5519s - [COUNTERS] Fortran MEs ( 1 ) : 0.4145s for 81920 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3222s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0423s + [COUNTERS] Fortran MEs ( 1 ) : 0.2799s for 81920 events => throughput is 2.93E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611968034155] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4442s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4004s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3245s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2934s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0309s for 8192 events => throughput is 2.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034155) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268150] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9642s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5311s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4327s for 81920 events => throughput is 1.89E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.3616s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0499s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3114s for 81920 events => throughput is 2.63E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268150) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.924342e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.686383e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.859061e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.686277e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611968034155] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4245s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3996s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.34E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3153s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2963s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 8192 events => throughput is 4.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034155) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268164] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7761s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5323s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2434s for 81920 events => throughput is 3.37E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.2305s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0455s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1849s for 81920 events => throughput is 4.43E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268164) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.358630e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.283365e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.362585e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.559050e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,9 +284,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4184s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4023s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.19E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3048s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2938s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.57E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,120 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6958s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5437s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1518s for 81920 events => throughput is 5.40E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.1524s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0447s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1076s for 81920 events => throughput is 7.61E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268178) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.300976e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.338241e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.342527e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4131s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3987s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.86E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.689330e+05 ) sec^-1 -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6781s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5398s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1379s for 81920 events => throughput is 5.94E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.813432e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.843429e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,89 +370,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034169] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4265s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4046s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0215s for 8192 events => throughput is 3.81E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034169) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7483s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5336s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2144s for 81920 events => throughput is 3.82E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.672595e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.764683e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -534,20 +380,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034176] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.8391s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8354s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.5974s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5835s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 8192 events => throughput is 1.47E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0084s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cuda (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.138611968034162) and hip (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,9 +405,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -569,59 +415,57 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9945s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9851s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 81920 events => throughput is 9.47E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 1.3589s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3333s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0166s for 81920 events => throughput is 4.93E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cuda (47.144596232268178) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.144596232268157) and hip (47.144596232268178) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.142986e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.531314e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.389230e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.422873e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.891641e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.710649e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.671813e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.082605e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.906867e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.711416e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.028190e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.861809e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.883975e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.688339e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.704910e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.996118e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 793d082383..84ba16449e 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-02_23:59:52 +DATE: 2024-10-04_11:58:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.8207s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7790s - [COUNTERS] Fortran MEs ( 1 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5633s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5347s + [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4407s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3997s - [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3190s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2901s + [COUNTERS] Fortran MEs ( 1 ) : 0.0288s for 8192 events => throughput is 2.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9658s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5547s - [COUNTERS] Fortran MEs ( 1 ) : 0.4111s for 81920 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3180s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0375s + [COUNTERS] Fortran MEs ( 1 ) : 0.2805s for 81920 events => throughput is 2.92E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138606099989779] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138605296829816] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4386s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3970s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3195s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0274s for 8192 events => throughput is 2.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138606099989779) differ by less than 4E-4 (1.2448487851646206e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138605296829816) differ by less than 4E-4 (1.4152313931869998e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144592707001024] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144592003933589] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9768s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5659s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4106s for 81920 events => throughput is 2.00E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.4477s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1735s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2740s for 81920 events => throughput is 2.99E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144592707001024) differ by less than 4E-4 (7.477563590541081e-08) +OK! xsec from fortran (47.144596232268157) and cpp (47.144592003933589) differ by less than 4E-4 (8.968863673963767e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.003295e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.062937e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.019987e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.091905e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138602111070696] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138602746994408] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4185s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4008s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.67E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3060s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2928s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.26E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138602111070696) differ by less than 4E-4 (2.091059336795098e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138602746994408) differ by less than 4E-4 (1.956154279669775e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144588828412729] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144589414828133] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7577s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5843s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1732s for 81920 events => throughput is 4.73E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.1729s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0429s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1299s for 81920 events => throughput is 6.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144588828412729) differ by less than 4E-4 (1.570456860111591e-07) +OK! xsec from fortran (47.144596232268157) and cpp (47.144589414828133) differ by less than 4E-4 (1.44607029572974e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.698016e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.446430e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.733377e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.475352e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138602995819163] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4056s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3965s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 8192 events => throughput is 9.29E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.2985s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2919s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138602995819163) differ by less than 4E-4 (1.9033685183522664e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144587555291501] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6291s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5406s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0883s for 81920 events => throughput is 9.28E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.1141s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0499s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0641s for 81920 events => throughput is 1.28E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07) +OK! xsec from fortran (47.144596232268157) and cpp (47.144587555291501) differ by less than 4E-4 (1.840502910077646e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.052077e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.346209e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4054s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3970s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6191s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5357s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0831s for 81920 events => throughput is 9.85E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.778412e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.283651e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.841904e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138606840950104] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4104s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3984s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0117s for 8192 events => throughput is 7.00E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.138611968034162) and cpp (47.138606840950104) differ by less than 4E-4 (1.0876612277499476e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.351786e+06 ) sec^-1 -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144591429357156] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6518s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5393s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1122s for 81920 events => throughput is 7.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144591429357156) differ by less than 4E-4 (1.0187617272006122e-07) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.954474e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.797285e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138612402172164] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138605197694872] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.8408s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8373s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.5747s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.85E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0125s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cuda (47.138612402172164) differ by less than 4E-4 (9.209817353195149e-09) +OK! xsec from fortran (47.138611968034162) and hip (47.138605197694872) differ by less than 4E-4 (1.4362619105146024e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596666727985] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144590142508306] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9846s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9761s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 81920 events => throughput is 1.04E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 1.3386s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3226s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.03E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0081s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cuda (47.144596666727985) differ by less than 4E-4 (9.215473939505614e-09) +OK! xsec from fortran (47.144596232268157) and hip (47.144590142508306) differ by less than 4E-4 (1.2917195901795964e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.218541e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.937998e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.615186e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.882822e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.024967e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.641800e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.388814e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.950148e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.001710e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.562820e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.373929e+08 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.033595e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.703628e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.191502e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.093326e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.262245e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index b1303dd832..37fbe019f1 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx - - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-03_00:00:19 +DATE: 2024-10-04_11:58:17 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.8412s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7976s - [COUNTERS] Fortran MEs ( 1 ) : 0.0436s for 8192 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5755s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5472s + [COUNTERS] Fortran MEs ( 1 ) : 0.0282s for 8192 events => throughput is 2.90E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4419s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4011s - [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3219s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2940s + [COUNTERS] Fortran MEs ( 1 ) : 0.0280s for 8192 events => throughput is 2.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9652s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5537s - [COUNTERS] Fortran MEs ( 1 ) : 0.4115s for 81920 events => throughput is 1.99E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3169s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0373s + [COUNTERS] Fortran MEs ( 1 ) : 0.2796s for 81920 events => throughput is 2.93E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613306947967] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4436s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3997s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0435s for 8192 events => throughput is 1.88E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3223s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2908s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0314s for 8192 events => throughput is 2.61E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947967) differ by less than 2E-4 (2.8403759566586473e-08) +OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947953) differ by less than 2E-4 (2.8403759344541868e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,28 +169,28 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597573367548] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9985s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5558s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4423s for 81920 events => throughput is 1.85E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.3872s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0719s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3151s for 81920 events => throughput is 2.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367548) differ by less than 2E-4 (2.8446512922997158e-08) +OK! xsec from fortran (47.144596232268157) and cpp (47.144597573367548) differ by less than 2E-4 (2.8446513367086368e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.844334e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.676606e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.882466e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.665448e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4231s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3983s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 8192 events => throughput is 3.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3084s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2897s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 8192 events => throughput is 4.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597573367555] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144597573367527] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7964s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5540s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2421s for 81920 events => throughput is 3.38E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.2271s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0426s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1844s for 81920 events => throughput is 4.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367555) differ by less than 2E-4 (2.8446512922997158e-08) +OK! xsec from fortran (47.144596232268157) and cpp (47.144597573367527) differ by less than 2E-4 (2.8446512922997158e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.366359e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.461113e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.389089e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.482033e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138613336664328] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4109s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3956s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.48E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3025s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2917s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 8192 events => throughput is 7.73E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) +OK! xsec from fortran (47.138611968034162) and cpp (47.138613336664328) differ by less than 2E-4 (2.9034163517849265e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144597613828985] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6933s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5438s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1491s for 81920 events => throughput is 5.49E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.1475s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0421s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1053s for 81920 events => throughput is 7.78E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) +OK! xsec from fortran (47.144596232268157) and cpp (47.144597613828985) differ by less than 2E-4 (2.9304754622927476e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.398655e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.400566e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4102s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3967s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 8192 events => throughput is 6.21E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.6744s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5361s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1380s for 81920 events => throughput is 5.94E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.941046e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.016996e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.965683e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 - [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.4168s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3953s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.89E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.104743e+05 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 - [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.7562s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5468s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2090s for 81920 events => throughput is 3.92E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.714345e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.833717e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611963547788] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611963547795] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.8403s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8366s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.75E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.5770s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5629s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.50E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cuda (47.138611963547788) differ by less than 2E-4 (9.517409083059647e-11) +OK! xsec from fortran (47.138611968034162) and hip (47.138611963547795) differ by less than 2E-4 (9.517397980829401e-11) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232269095] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232269080] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.9861s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9767s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 81920 events => throughput is 9.38E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 1.3704s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3455s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 81920 events => throughput is 4.99E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268185) and cuda (47.144596232269095) differ by less than 2E-4 (1.9317880628477724e-14) +OK! xsec from fortran (47.144596232268157) and hip (47.144596232269080) differ by less than 2E-4 (1.9539925233402755e-14) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.100732e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.577917e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.378501e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.490514e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.877553e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.342087e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.586294e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.130707e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.878727e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.749737e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.988107e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.928388e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.887451e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.739049e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.727351e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.133614e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 46adcb615c..2e40ef7bc3 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-10-03_00:00:48 +DATE: 2024-10-04_11:58:34 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.7427s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4118s - [COUNTERS] Fortran MEs ( 1 ) : 0.3309s for 8192 events => throughput is 2.48E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.5953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3908s + [COUNTERS] Fortran MEs ( 1 ) : 0.2045s for 8192 events => throughput is 4.00E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.6960s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s - [COUNTERS] Fortran MEs ( 1 ) : 0.3206s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4766s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2714s + [COUNTERS] Fortran MEs ( 1 ) : 0.2052s for 8192 events => throughput is 3.99E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.0380s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8383s - [COUNTERS] Fortran MEs ( 1 ) : 3.1997s for 81920 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.2989s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2518s + [COUNTERS] Fortran MEs ( 1 ) : 2.0471s for 81920 events => throughput is 4.00E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7120s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3767s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3342s for 8192 events => throughput is 2.45E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.5286s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2780s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2499s for 8192 events => throughput is 3.28E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.2236s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8645s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.3580s for 81920 events => throughput is 2.44E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s + [COUNTERS] PROGRAM TOTAL : 3.7677s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2400s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5271s for 81920 events => throughput is 3.24E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.533053e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.415404e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.520171e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.432915e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5526s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3771s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1749s for 8192 events => throughput is 4.68E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.4061s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2791s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1265s for 8192 events => throughput is 6.47E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748567E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279650E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.6037s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8506s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.7524s for 81920 events => throughput is 4.67E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 2.5077s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2459s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2614s for 81920 events => throughput is 6.49E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279650E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.765396e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.651277e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.746828e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.666297e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4648s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3764s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0877s for 8192 events => throughput is 9.34E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.3419s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2790s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0626s for 8192 events => throughput is 1.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720207E-002) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558171606505E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.7094s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8326s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8763s for 81920 events => throughput is 9.35E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 1.8640s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2399s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6238s for 81920 events => throughput is 1.31E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606505E-002) differ by less than 3E-14 (6.661338147750939e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.620733e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.358665e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.522409e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.361847e+05 ) sec^-1 -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4555s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3767s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0783s for 8192 events => throughput is 1.05E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.6252s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8413s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7834s for 81920 events => throughput is 1.05E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.084541e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.078252e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748581E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4849s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3756s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1086s for 8192 events => throughput is 7.54E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748581E-002) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.9263s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8463s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0793s for 81920 events => throughput is 7.59E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.311463e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.566979e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.8289s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8165s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.79E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0040s + [COUNTERS] PROGRAM TOTAL : 0.5701s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5411s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.25E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0160s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279636E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3368s - [COUNTERS] Fortran Overhead ( 0 ) : 2.3090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 81920 events => throughput is 3.31E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s + [COUNTERS] PROGRAM TOTAL : 1.6277s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5196s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0930s for 81920 events => throughput is 8.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0150s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656827279636E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.131553e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.356390e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.559107e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.872981e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.471514e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.608404e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.165070e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.583700e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.479703e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.634325e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.174058e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.187650e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.475036e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.594477e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.650749e+06 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.330851e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 0712f66370..1c90249307 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-10-03_00:01:30 +DATE: 2024-10-04_11:59:06 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.7200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4002s - [COUNTERS] Fortran MEs ( 1 ) : 0.3198s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4940s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2895s + [COUNTERS] Fortran MEs ( 1 ) : 0.2045s for 8192 events => throughput is 4.01E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.6959s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3743s - [COUNTERS] Fortran MEs ( 1 ) : 0.3216s for 8192 events => throughput is 2.55E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4785s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2742s + [COUNTERS] Fortran MEs ( 1 ) : 0.2043s for 8192 events => throughput is 4.01E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.0307s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8361s - [COUNTERS] Fortran MEs ( 1 ) : 3.1946s for 81920 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.2694s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2314s + [COUNTERS] Fortran MEs ( 1 ) : 2.0380s for 81920 events => throughput is 4.02E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471473453718410E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474238393007253E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.6986s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3752s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3225s for 8192 events => throughput is 2.54E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 0.5072s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2275s for 8192 events => throughput is 3.60E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471473453718410E-002) differ by less than 4E-4 (1.574588530672827e-07) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474238393007253E-002) differ by less than 4E-4 (1.6693007842683016e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971643267110940E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971543373778375E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.0691s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8467s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.2214s for 81920 events => throughput is 2.54E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 3.5027s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2415s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2607s for 81920 events => throughput is 3.62E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971643267110940E-002) differ by less than 4E-4 (1.69562182517069e-07) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971543373778375E-002) differ by less than 4E-4 (1.8503863641328167e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.593703e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.755548e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.627112e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.745750e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471459294758378E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474229018345096E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4748s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3764s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0979s for 8192 events => throughput is 8.37E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3505s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2787s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459294758378E-002) differ by less than 4E-4 (3.37893311330717e-07) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474229018345096E-002) differ by less than 4E-4 (2.8639171045785616e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971629726281482E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971534528332888E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.8449s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8535s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9910s for 81920 events => throughput is 8.27E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.9790s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2662s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7126s for 81920 events => throughput is 1.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629726281482E-002) differ by less than 4E-4 (3.38882539141494e-07) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971534528332888E-002) differ by less than 4E-4 (2.9564602843645815e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.427461e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.164275e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.482393e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.183598e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474228627553363E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4277s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3831s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3112s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2781s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0328s for 8192 events => throughput is 2.50E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474228627553363E-002) differ by less than 4E-4 (2.9137158252812156e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971533958864222E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3085s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8569s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4513s for 81920 events => throughput is 1.82E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.5739s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2456s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3281s for 81920 events => throughput is 2.50E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971533958864222E-002) differ by less than 4E-4 (3.027669184252346e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.850187e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.831580e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4196s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3788s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.2460s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8324s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4133s for 81920 events => throughput is 1.98E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.031384e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.558827e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.026199e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471471932611128E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4322s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3792s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0525s for 8192 events => throughput is 1.56E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471471932611128E-002) differ by less than 4E-4 (1.768430569759616e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.580039e+05 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971639934306102E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3632s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8352s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5276s for 81920 events => throughput is 1.55E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971639934306102E-002) differ by less than 4E-4 (2.1123700788550082e-07) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.529803e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.472905e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471475012321185E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474239700037612E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.8184s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8139s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.45E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.5888s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5671s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0148s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471475012321185E-002) differ by less than 4E-4 (1.375968260441951e-07) +OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474239700037612E-002) differ by less than 4E-4 (1.5027454702831733e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971648932322295E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971544830799671E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.2883s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2747s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 81920 events => throughput is 6.55E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 1.5716s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5221s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0345s for 81920 events => throughput is 2.37E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0150s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971648932322295E-002) differ by less than 4E-4 (9.872194262072753e-08) +OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971544830799671E-002) differ by less than 4E-4 (1.6681939285501102e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.744391e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.189894e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.016184e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.062787e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.305157e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.607979e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.210328e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.880321e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.310024e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.571112e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.309757e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.534436e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.203011e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.728317e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.259858e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.018324e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 2b4351374c..3b278e2325 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-10-03_00:02:09 +DATE: 2024-10-04_11:59:34 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.7145s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3961s - [COUNTERS] Fortran MEs ( 1 ) : 0.3184s for 8192 events => throughput is 2.57E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4986s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2938s + [COUNTERS] Fortran MEs ( 1 ) : 0.2047s for 8192 events => throughput is 4.00E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.6928s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3731s - [COUNTERS] Fortran MEs ( 1 ) : 0.3196s for 8192 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.4854s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2811s + [COUNTERS] Fortran MEs ( 1 ) : 0.2043s for 8192 events => throughput is 4.01E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.0430s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8391s - [COUNTERS] Fortran MEs ( 1 ) : 3.2039s for 81920 events => throughput is 2.56E+04 events/s + [COUNTERS] PROGRAM TOTAL : 3.2703s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2288s + [COUNTERS] Fortran MEs ( 1 ) : 2.0416s for 81920 events => throughput is 4.01E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471486590207584E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474252272193679E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.7169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3393s for 8192 events => throughput is 2.41E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.5242s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2787s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2449s for 8192 events => throughput is 3.34E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486590207584E-002) differ by less than 2E-4 (9.945765766516956e-09) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252272193679E-002) differ by less than 2E-4 (9.93285631523122e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971657589635384E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558933520065E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 5.3096s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8692s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4393s for 81920 events => throughput is 2.38E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 3.6907s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2426s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4474s for 81920 events => throughput is 3.35E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589635384E-002) differ by less than 2E-4 (9.532824529756567e-09) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558933520065E-002) differ by less than 2E-4 (9.527307387457995e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.514208e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.403968e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.505372e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.432539e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471486540430027E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474252220105081E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5510s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3773s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1730s for 8192 events => throughput is 4.74E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.4042s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2776s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1261s for 8192 events => throughput is 6.49E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486540430027E-002) differ by less than 2E-4 (9.311426296676473e-09) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252220105081E-002) differ by less than 2E-4 (9.269089717989232e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971657589963913E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558934000736E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.5915s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8566s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.7343s for 81920 events => throughput is 4.72E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 2.5119s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2399s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2715s for 81920 events => throughput is 6.44E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589963913E-002) differ by less than 2E-4 (9.536932576992285e-09) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558934000736E-002) differ by less than 2E-4 (9.53331791286871e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.812710e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.548717e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.847792e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.561659e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474252077403842E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4686s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3818s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0862s for 8192 events => throughput is 9.50E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3412s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2787s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0622s for 8192 events => throughput is 1.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09) +OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252077403842E-002) differ by less than 2E-4 (7.450642991457812e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,120 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558777659491E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.6868s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8256s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8606s for 81920 events => throughput is 9.52E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 1.8704s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2513s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6188s for 81920 events => throughput is 1.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09) +OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558777659491E-002) differ by less than 2E-4 (7.578357275050962e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.423883e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.369835e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.654532e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4531s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0756s for 8192 events => throughput is 1.08E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s +Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.372187e+05 ) sec^-1 -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.6024s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8349s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7669s for 81920 events => throughput is 1.07E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.087750e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.111565e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,110 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471486537749241E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8474251477062731E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4892s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3761s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1125s for 8192 events => throughput is 7.28E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486537749241E-002) differ by less than 2E-4 (9.277263846030337e-09) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971657565670345E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.9498s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8348s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1145s for 81920 events => throughput is 7.35E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657565670345E-002) differ by less than 2E-4 (9.233155351395794e-09) + [COUNTERS] PROGRAM TOTAL : 0.5730s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5443s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.23E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0156s -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical +OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474251477062731E-002) differ by less than 2E-4 (1.9952373087051e-10) -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.402526e+04 ) sec^-1 +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.355239e+04 ) sec^-1 +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 32/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8471485791426987E-002] fbridge_mode=1 - [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.8194s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8081s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.81E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485791426987E-002) differ by less than 2E-4 (2.334807902570901e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971656830583548E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971558174786780E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.3053s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2776s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 81920 events => throughput is 3.31E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s + [COUNTERS] PROGRAM TOTAL : 1.6201s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0927s for 81920 events => throughput is 8.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0144s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656830583548E-002) differ by less than 2E-4 (4.131384123695625e-11) +OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971558174786780E-002) differ by less than 2E-4 (3.976818874207311e-11) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.136542e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.379182e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.566641e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.862774e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.411150e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.640817e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155971e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.619080e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.424302e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.599391e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.169194e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.190046e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.426806e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.571067e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.634141e+06 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.329072e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index ab6656c8c9..33c968e969 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-10-03_00:02:52 +DATE: 2024-10-04_12:00:06 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.4509s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2925s - [COUNTERS] Fortran MEs ( 1 ) : 4.1585s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.8086s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2938s + [COUNTERS] Fortran MEs ( 1 ) : 2.5148s for 8192 events => throughput is 3.26E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.4534s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2814s - [COUNTERS] Fortran MEs ( 1 ) : 4.1719s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7299s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2214s + [COUNTERS] Fortran MEs ( 1 ) : 2.5085s for 8192 events => throughput is 3.27E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 43.7199s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9892s - [COUNTERS] Fortran MEs ( 1 ) : 41.7307s for 81920 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.5450s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3769s + [COUNTERS] Fortran MEs ( 1 ) : 25.1681s for 81920 events => throughput is 3.25E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849706926843] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.6017s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2867s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3065s for 8192 events => throughput is 1.90E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s + [COUNTERS] PROGRAM TOTAL : 3.3904s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2232s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.1611s for 8192 events => throughput is 2.59E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926843) differ by less than 3E-14 (8.881784197001252e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930270975283632] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 45.3130s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0098s - [COUNTERS] CudaCpp MEs ( 2 ) : 43.2947s for 81920 events => throughput is 1.89E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s + [COUNTERS] PROGRAM TOTAL : 33.1597s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3978s + [COUNTERS] CudaCpp MEs ( 2 ) : 31.7557s for 81920 events => throughput is 2.58E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283632) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.952909e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.681013e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.958655e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.677051e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849706926832] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.5865s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2840s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2977s for 8192 events => throughput is 3.57E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s + [COUNTERS] PROGRAM TOTAL : 1.7580s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2223s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.5326s for 8192 events => throughput is 5.35E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926832) differ by less than 3E-14 (1.2212453270876722e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248325] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930270975283630] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 25.1319s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9980s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.1295s for 81920 events => throughput is 3.54E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0045s + [COUNTERS] PROGRAM TOTAL : 16.8796s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3725s + [COUNTERS] CudaCpp MEs ( 2 ) : 15.5040s for 81920 events => throughput is 5.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248325) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283630) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.678244e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.482381e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.697932e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.503997e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849706926854] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.2929s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2902s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0004s for 8192 events => throughput is 8.19E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s + [COUNTERS] PROGRAM TOTAL : 0.9114s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2254s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6844s for 8192 events => throughput is 1.20E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926854) differ by less than 3E-14 (5.551115123125783e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930270975283624] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 12.1225s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0172s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.1030s for 81920 events => throughput is 8.11E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s + [COUNTERS] PROGRAM TOTAL : 8.1097s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3682s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.7400s for 81920 events => throughput is 1.22E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283624) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.443138e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.418128e+03 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.1699s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2861s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8818s for 8192 events => throughput is 9.29E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 10.9025s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0005s - [COUNTERS] CudaCpp MEs ( 2 ) : 8.9000s for 81920 events => throughput is 9.20E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.551865e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.246344e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.487120e+03 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.4032s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2864s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1141s for 8192 events => throughput is 7.35E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.251733e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 13.1691s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0018s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.1648s for 81920 events => throughput is 7.34E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.467628e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.444952e+03 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849706926843] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7927s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7197s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0384s for 8192 events => throughput is 2.13E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0346s + [COUNTERS] PROGRAM TOTAL : 0.7017s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4925s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1103s for 8192 events => throughput is 7.42E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0989s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.33144849706926871) and hip (0.33144849706926843) differ by less than 3E-14 (8.881784197001252e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248336] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930270975283644] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.7809s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4100s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3362s for 81920 events => throughput is 2.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s + [COUNTERS] PROGRAM TOTAL : 2.8477s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6834s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0651s for 81920 events => throughput is 7.69E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0992s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cuda (0.20930257969248336) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (0.20930270975283627) and hip (0.20930270975283644) differ by less than 3E-14 (8.881784197001252e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.147561e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.511967e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.353804e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.048767e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122777e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.810313e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.172118e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.859104e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.120194e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.808984e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.166091e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.631150e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.125549e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.811154e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.430424e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.829336e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 702a33cbc5..dc6ff47a1e 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-10-03_00:06:36 +DATE: 2024-10-04_12:03:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.4507s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2872s - [COUNTERS] Fortran MEs ( 1 ) : 4.1635s for 8192 events => throughput is 1.97E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7355s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2198s + [COUNTERS] Fortran MEs ( 1 ) : 2.5158s for 8192 events => throughput is 3.26E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.4557s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2815s - [COUNTERS] Fortran MEs ( 1 ) : 4.1743s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7838s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2710s + [COUNTERS] Fortran MEs ( 1 ) : 2.5128s for 8192 events => throughput is 3.26E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 43.8607s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9951s - [COUNTERS] Fortran MEs ( 1 ) : 41.8656s for 81920 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.5652s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4342s + [COUNTERS] Fortran MEs ( 1 ) : 25.1310s for 81920 events => throughput is 3.26E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144941544531159] fbridge_mode=1 + [XSECTION] Cross section = 0.3315 [0.33145004642682091] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.4956s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.1989s for 8192 events => throughput is 1.95E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0081s + [COUNTERS] PROGRAM TOTAL : 3.2930s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2219s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.0651s for 8192 events => throughput is 2.67E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0060s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144941544531159) differ by less than 4E-4 (4.675947774535061e-06) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33145004642682091) differ by less than 4E-4 (4.6745046844431926e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,39 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930329135137288] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930342252742398] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 43.9267s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9929s - [COUNTERS] CudaCpp MEs ( 2 ) : 41.9257s for 81920 events => throughput is 1.95E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0081s + [COUNTERS] PROGRAM TOTAL : 32.1448s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3719s + [COUNTERS] CudaCpp MEs ( 2 ) : 30.7669s for 81920 events => throughput is 2.66E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0060s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930329135137288) differ by less than 4E-4 (3.400143900211816e-06) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930342252742398) differ by less than 4E-4 (3.405472335016313e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.014568e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.754667e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.012026e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.746206e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -205,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144937378275385] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144996928807552] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.4417s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2863s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1531s for 8192 events => throughput is 7.10E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s + [COUNTERS] PROGRAM TOTAL : 0.9935s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2243s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7676s for 8192 events => throughput is 1.07E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144937378275385) differ by less than 4E-4 (4.550249099066761e-06) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144996928807552) differ by less than 4E-4 (4.441772461838411e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -240,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930324959819654] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930338466143997] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 13.6612s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0142s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.6444s for 81920 events => throughput is 7.04E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + [COUNTERS] PROGRAM TOTAL : 9.1868s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4152s + [COUNTERS] CudaCpp MEs ( 2 ) : 7.7699s for 81920 events => throughput is 1.05E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930324959819654) differ by less than 4E-4 (3.2006567445286294e-06) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930338466143997) differ by less than 4E-4 (3.2245574101974483e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.242904e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.096480e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.273553e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.097849e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -285,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 + [XSECTION] Cross section = 0.3315 [0.33145003508801812] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7933s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2859s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5062s for 8192 events => throughput is 1.62E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s + [COUNTERS] PROGRAM TOTAL : 0.5705s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2235s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3461s for 8192 events => throughput is 2.37E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33145003508801812) differ by less than 4E-4 (4.6402948361556895e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -320,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930341333868943] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 7.0232s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9972s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.0248s for 81920 events => throughput is 1.63E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s + [COUNTERS] PROGRAM TOTAL : 4.8598s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3988s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4602s for 81920 events => throughput is 2.37E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930341333868943) differ by less than 4E-4 (3.361570683813042e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.674381e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.661626e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7425s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2857s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4556s for 8192 events => throughput is 1.80E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 6.5552s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0045s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.5495s for 81920 events => throughput is 1.80E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.845679e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.423170e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.886817e+04 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144947551388249] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.8375s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2877s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5485s for 8192 events => throughput is 1.49E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144947551388249) differ by less than 4E-4 (4.857178601991308e-06) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.433338e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930331717025510] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 7.5027s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9956s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.5057s for 81920 events => throughput is 1.49E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930331717025510) differ by less than 4E-4 (3.523500632152121e-06) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.507537e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.510473e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -525,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144955535316123] fbridge_mode=1 + [XSECTION] Cross section = 0.3315 [0.33145003134925582] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7720s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7206s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 8192 events => throughput is 3.05E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0245s + [COUNTERS] PROGRAM TOTAL : 0.6732s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0721s for 8192 events => throughput is 1.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1090s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cuda (0.33144955535316123) differ by less than 4E-4 (5.0980589545446264e-06) +OK! xsec from fortran (0.33144849706926871) and hip (0.33145003134925582) differ by less than 4E-4 (4.629014765944461e-06) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -560,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930336562619947] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930346901257960] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.6799s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4230s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2326s for 81920 events => throughput is 3.52E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0244s + [COUNTERS] PROGRAM TOTAL : 2.4415s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6511s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6816s for 81920 events => throughput is 1.20E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1088s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cuda (0.20930336562619947) differ by less than 4E-4 (3.755012085271403e-06) +OK! xsec from fortran (0.20930270975283627) and hip (0.20930346901257960) differ by less than 4E-4 (3.6275676709163207e-06) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.113806e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155724e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.387968e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.933893e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.095200e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.956222e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.214105e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.074175e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.131792e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.958991e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.212764e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.277745e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.089022e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.955651e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.392733e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.769522e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 31826ff276..158ac94012 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-10-03_00:09:34 +DATE: 2024-10-04_12:06:30 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 4.4565s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2843s - [COUNTERS] Fortran MEs ( 1 ) : 4.1722s for 8192 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7225s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2186s + [COUNTERS] Fortran MEs ( 1 ) : 2.5040s for 8192 events => throughput is 3.27E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.4257s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2811s - [COUNTERS] Fortran MEs ( 1 ) : 4.1447s for 8192 events => throughput is 1.98E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.7588s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2519s + [COUNTERS] Fortran MEs ( 1 ) : 2.5070s for 8192 events => throughput is 3.27E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 43.7093s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9954s - [COUNTERS] Fortran MEs ( 1 ) : 41.7139s for 81920 events => throughput is 1.96E+03 events/s + [COUNTERS] PROGRAM TOTAL : 26.5426s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3923s + [COUNTERS] Fortran MEs ( 1 ) : 25.1503s for 81920 events => throughput is 3.26E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786734542164] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849880304822] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 4.7251s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2941s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4223s for 8192 events => throughput is 1.85E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0087s + [COUNTERS] PROGRAM TOTAL : 3.3978s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2217s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.1699s for 8192 events => throughput is 2.58E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786734542164) differ by less than 2E-4 (5.228634192278037e-09) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849880304822) differ by less than 2E-4 (5.230916810816666e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258048084049] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930271054111049] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 45.7171s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9919s - [COUNTERS] CudaCpp MEs ( 2 ) : 43.7167s for 81920 events => throughput is 1.87E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s + [COUNTERS] PROGRAM TOTAL : 33.2111s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3776s + [COUNTERS] CudaCpp MEs ( 2 ) : 31.8273s for 81920 events => throughput is 2.57E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258048084049) differ by less than 2E-4 (3.766591261111785e-09) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271054111049) differ by less than 2E-4 (3.766192246956734e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.939321e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.680645e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.929194e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.679354e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786651655289] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849797290254] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.6038s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2841s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3150s for 8192 events => throughput is 3.54E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0046s + [COUNTERS] PROGRAM TOTAL : 1.7465s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2254s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.5180s for 8192 events => throughput is 5.40E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786651655289) differ by less than 2E-4 (2.7278828085286477e-09) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849797290254) differ by less than 2E-4 (2.7263173940639263e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258019984904] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930271025983213] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 25.0226s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9994s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.0184s for 81920 events => throughput is 3.56E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0047s + [COUNTERS] PROGRAM TOTAL : 16.6740s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3763s + [COUNTERS] CudaCpp MEs ( 2 ) : 15.2946s for 81920 events => throughput is 5.36E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019984904) differ by less than 2E-4 (2.424078271445751e-09) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271025983213) differ by less than 2E-4 (2.4223090200337083e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.656422e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.552453e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.652891e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.571907e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849773665513] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.2899s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2852s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0025s for 8192 events => throughput is 8.17E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s + [COUNTERS] PROGRAM TOTAL : 0.9076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2256s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6805s for 8192 events => throughput is 1.20E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) +OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849773665513) differ by less than 2E-4 (2.013544886381169e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930271025898603] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 12.0048s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9956s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.0070s for 81920 events => throughput is 8.19E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s + [COUNTERS] PROGRAM TOTAL : 8.2499s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4024s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.8460s for 81920 events => throughput is 1.20E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) +OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271025898603) differ by less than 2E-4 (2.418266698001048e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.425311e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.431412e+03 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.1516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2832s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8664s for 8192 events => throughput is 9.46E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 10.7114s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9864s - [COUNTERS] CudaCpp MEs ( 2 ) : 8.7229s for 81920 events => throughput is 9.39E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.568644e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.235936e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.554146e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.229570e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 - [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.4200s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2915s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1258s for 8192 events => throughput is 7.28E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 104 - [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 - [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 13.2897s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9983s - [COUNTERS] CudaCpp MEs ( 2 ) : 11.2889s for 81920 events => throughput is 7.26E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.423207e+03 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.188334e+03 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144786533876569] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144849679653593] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7990s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7259s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0383s for 8192 events => throughput is 2.14E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s + [COUNTERS] PROGRAM TOTAL : 0.7028s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4930s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1107s for 8192 events => throughput is 7.40E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0991s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786533876569) differ by less than 2E-4 (8.255786054789382e-10) +OK! xsec from fortran (0.33144849706926871) and hip (0.33144849679653593) differ by less than 2E-4 (8.228511205743416e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930258003933860] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930271009954451] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.7965s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4257s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3361s for 81920 events => throughput is 2.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s + [COUNTERS] PROGRAM TOTAL : 2.8418s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6722s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0702s for 81920 events => throughput is 7.65E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0995s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930257969248323) and cuda (0.20930258003933860) differ by less than 2E-4 (1.6571959360334176e-09) +OK! xsec from fortran (0.20930270975283627) and hip (0.20930271009954451) differ by less than 2E-4 (1.6564918325912004e-09) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.172471e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.499906e+04 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.362761e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.007237e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.126051e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.803764e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.165509e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.824759e+04 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.125049e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.806219e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.168356e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.604334e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.132671e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.802602e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.419294e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.820495e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 1c9ef17ccc..5700ce5a9f 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,41 +1,21 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - - -make USEBUILDDIR=1 BACKEND=cuda - -make USEBUILDDIR=1 BACKEND=cppnone - -make USEBUILDDIR=1 BACKEND=cppsse4 - -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + OMP_NUM_THREADS= -DATE: 2024-10-03_00:14:52 +DATE: 2024-10-04_12:11:04 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +29,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 101.3500s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5239s - [COUNTERS] Fortran MEs ( 1 ) : 100.8261s for 8192 events => throughput is 8.12E+01 events/s + [COUNTERS] PROGRAM TOTAL : 55.1538s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4743s + [COUNTERS] Fortran MEs ( 1 ) : 54.6795s for 8192 events => throughput is 1.50E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +54,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 100.9221s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5152s - [COUNTERS] Fortran MEs ( 1 ) : 100.4069s for 8192 events => throughput is 8.16E+01 events/s + [COUNTERS] PROGRAM TOTAL : 55.1752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3889s + [COUNTERS] Fortran MEs ( 1 ) : 54.7863s for 8192 events => throughput is 1.50E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +79,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 998.1100s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3849s - [COUNTERS] Fortran MEs ( 1 ) : 993.7252s for 81920 events => throughput is 8.24E+01 events/s + [COUNTERS] PROGRAM TOTAL : 552.2886s + [COUNTERS] Fortran Overhead ( 0 ) : 4.0331s + [COUNTERS] Fortran MEs ( 1 ) : 548.2555s for 81920 events => throughput is 1.49E+02 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +104,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729949E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 119.7848s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5133s - [COUNTERS] CudaCpp MEs ( 2 ) : 119.0752s for 8192 events => throughput is 6.88E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1962s + [COUNTERS] PROGRAM TOTAL : 86.6739s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4545s + [COUNTERS] CudaCpp MEs ( 2 ) : 86.0604s for 8192 events => throughput is 9.52E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1591s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729949E-007) differ by less than 3E-14 (3.552713678800501e-15) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +139,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633775E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333072E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1194.8842s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3319s - [COUNTERS] CudaCpp MEs ( 2 ) : 1190.3522s for 81920 events => throughput is 6.88E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2001s + [COUNTERS] PROGRAM TOTAL : 867.1055s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7996s + [COUNTERS] CudaCpp MEs ( 2 ) : 864.1713s for 81920 events => throughput is 9.48E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1346s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633775E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333072E-007) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.974801e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.195599e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.902621e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.199200e+02 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +184,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729943E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 62.0110s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5249s - [COUNTERS] CudaCpp MEs ( 2 ) : 61.3838s for 8192 events => throughput is 1.33E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1022s + [COUNTERS] PROGRAM TOTAL : 43.8186s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4341s + [COUNTERS] CudaCpp MEs ( 2 ) : 43.2592s for 8192 events => throughput is 1.89E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1253s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729943E-007) differ by less than 3E-14 (3.3306690738754696e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +219,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333069E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 616.2779s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3647s - [COUNTERS] CudaCpp MEs ( 2 ) : 611.8092s for 81920 events => throughput is 1.34E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1039s + [COUNTERS] PROGRAM TOTAL : 434.4773s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8275s + [COUNTERS] CudaCpp MEs ( 2 ) : 431.5815s for 81920 events => throughput is 1.90E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0683s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333069E-007) differ by less than 3E-14 (1.3322676295501878e-15) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.632598e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.297706e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.628468e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.357210e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +264,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729933E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 28.8684s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5085s - [COUNTERS] CudaCpp MEs ( 2 ) : 28.3140s for 8192 events => throughput is 2.89E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0459s + [COUNTERS] PROGRAM TOTAL : 20.1425s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4087s + [COUNTERS] CudaCpp MEs ( 2 ) : 19.6505s for 8192 events => throughput is 4.17E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0833s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729933E-007) differ by less than 3E-14 (2.886579864025407e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,309 +299,45 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333072E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 284.5568s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3064s - [COUNTERS] CudaCpp MEs ( 2 ) : 280.2035s for 81920 events => throughput is 2.92E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0469s + [COUNTERS] PROGRAM TOTAL : 200.9873s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7857s + [COUNTERS] CudaCpp MEs ( 2 ) : 198.1703s for 81920 events => throughput is 4.13E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0313s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333072E-007) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.517015e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.538692e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 25.2889s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5134s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.7360s for 8192 events => throughput is 3.31E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0395s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 254.5108s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3262s - [COUNTERS] CudaCpp MEs ( 2 ) : 250.1446s for 81920 events => throughput is 3.27E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0399s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.062937e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.068720e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 24.8525s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5118s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.2957s for 8192 events => throughput is 3.37E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0449s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 250.4117s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3538s - [COUNTERS] CudaCpp MEs ( 2 ) : 246.0095s for 81920 events => throughput is 3.33E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0485s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.630906e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.148582e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.609231e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 3.2173s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0360s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.1013s for 8192 events => throughput is 7.44E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 1.0800s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633791E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 16.7881s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8408s - [COUNTERS] CudaCpp MEs ( 2 ) : 10.8652s for 81920 events => throughput is 7.54E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 1.0822s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713115633791E-007) differ by less than 3E-14 (2.220446049250313e-15) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.474483e+03 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.239436e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.257821e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.542937e+03 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.224358e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.204472e+02 ) sec^-1 -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.425016e+03 ) sec^-1 +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.260076e+03 ) sec^-1 +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.246009e+03 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 4235e6c48d..b90b1d8d16 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,41 +1,21 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - -make USEBUILDDIR=1 BACKEND=cuda - - - -make USEBUILDDIR=1 BACKEND=cppnone - -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + OMP_NUM_THREADS= -DATE: 2024-10-03_01:30:56 +DATE: 2024-10-04_12:53:49 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +29,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 101.4851s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5203s - [COUNTERS] Fortran MEs ( 1 ) : 100.9648s for 8192 events => throughput is 8.11E+01 events/s + [COUNTERS] PROGRAM TOTAL : 55.1920s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3538s + [COUNTERS] Fortran MEs ( 1 ) : 54.8381s for 8192 events => throughput is 1.49E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +54,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 100.7472s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5235s - [COUNTERS] Fortran MEs ( 1 ) : 100.2237s for 8192 events => throughput is 8.17E+01 events/s + [COUNTERS] PROGRAM TOTAL : 55.1638s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3878s + [COUNTERS] Fortran MEs ( 1 ) : 54.7760s for 8192 events => throughput is 1.50E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +79,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1009.1613s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4219s - [COUNTERS] Fortran MEs ( 1 ) : 1004.7394s for 81920 events => throughput is 8.15E+01 events/s + [COUNTERS] PROGRAM TOTAL : 552.3796s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8778s + [COUNTERS] Fortran MEs ( 1 ) : 549.5018s for 81920 events => throughput is 1.49E+02 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,25 +104,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575849446922190E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575308139230432E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 110.1880s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5092s - [COUNTERS] CudaCpp MEs ( 2 ) : 109.4957s for 8192 events => throughput is 7.48E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1831s + [COUNTERS] PROGRAM TOTAL : 89.4764s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4248s + [COUNTERS] CudaCpp MEs ( 2 ) : 88.8225s for 8192 events => throughput is 9.22E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2290s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575849446922190E-007) differ by less than 4E-4 (0.00013947977747852391) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575308139230432E-007) differ by less than 4E-4 (0.0001395002856556804) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,39 +140,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845954405861011E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2846099389242361E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1102.6591s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3176s - [COUNTERS] CudaCpp MEs ( 2 ) : 1098.1619s for 81920 events => throughput is 7.46E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1796s + [COUNTERS] PROGRAM TOTAL : 895.8954s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8367s + [COUNTERS] CudaCpp MEs ( 2 ) : 892.9121s for 81920 events => throughput is 9.17E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1466s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845954405861011E-007) differ by less than 4E-4 (0.00014189602657355138) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846099389242361E-007) differ by less than 4E-4 (0.00014187637267237818) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.906901e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.094534e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.884410e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.098895e+02 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -206,25 +186,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575845178322101E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575303913232094E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 27.5604s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5117s - [COUNTERS] CudaCpp MEs ( 2 ) : 27.0033s for 8192 events => throughput is 3.03E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0454s + [COUNTERS] PROGRAM TOTAL : 20.9041s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4832s + [COUNTERS] CudaCpp MEs ( 2 ) : 20.3319s for 8192 events => throughput is 4.03E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0890s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845178322101E-007) differ by less than 4E-4 (0.0001392986940575991) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575303913232094E-007) differ by less than 4E-4 (0.00013932100537483727) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -242,39 +222,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845949484525033E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2846096068245575E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 271.4748s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3092s - [COUNTERS] CudaCpp MEs ( 2 ) : 267.1201s for 81920 events => throughput is 3.07E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0455s + [COUNTERS] PROGRAM TOTAL : 204.8498s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8168s + [COUNTERS] CudaCpp MEs ( 2 ) : 202.0002s for 81920 events => throughput is 4.06E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0328s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845949484525033E-007) differ by less than 4E-4 (0.00014168058211416756) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846096068245575E-007) differ by less than 4E-4 (0.00014173098820635666) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.509205e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.940133e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.514230e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.860175e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -288,25 +268,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575304434295576E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 14.2097s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5091s - [COUNTERS] CudaCpp MEs ( 2 ) : 13.6782s for 8192 events => throughput is 5.99E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0224s + [COUNTERS] PROGRAM TOTAL : 10.2208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3848s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.7269s for 8192 events => throughput is 8.42E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1090s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575304434295576E-007) differ by less than 4E-4 (0.0001393431105436438) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -324,314 +304,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2846087407964351E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 143.5363s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3592s - [COUNTERS] CudaCpp MEs ( 2 ) : 139.1540s for 81920 events => throughput is 5.89E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0230s + [COUNTERS] PROGRAM TOTAL : 101.2826s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8628s + [COUNTERS] CudaCpp MEs ( 2 ) : 98.4042s for 81920 events => throughput is 8.32E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0156s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846087407964351E-007) differ by less than 4E-4 (0.00014135186397323807) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.841559e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.933769e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 12.8982s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5095s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.3688s for 8192 events => throughput is 6.62E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0200s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 130.1707s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3403s - [COUNTERS] CudaCpp MEs ( 2 ) : 125.8089s for 81920 events => throughput is 6.51E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0214s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.983770e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.944370e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575850859831750E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 12.5708s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5217s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.0269s for 8192 events => throughput is 6.81E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0222s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575850859831750E-007) differ by less than 4E-4 (0.00013953971621538663) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845946568145136E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 124.0846s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3219s - [COUNTERS] CudaCpp MEs ( 2 ) : 119.7399s for 81920 events => throughput is 6.84E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0228s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845946568145136E-007) differ by less than 4E-4 (0.00014155290989403824) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.302945e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.030804e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.303967e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575862304433055E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 2.1905s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0793s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5428s for 8192 events => throughput is 1.51E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.5684s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3575862304433055E-007) differ by less than 4E-4 (0.00014002522141920437) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2845959888250639E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 10.7124s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8151s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.3406s for 81920 events => throughput is 1.53E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.5567s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2845959888250639E-007) differ by less than 4E-4 (0.0001421360326359089) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.518595e+04 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.518521e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.124721e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.157002e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.133696e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.047420e+03 ) sec^-1 -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.149769e+04 ) sec^-1 +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.138034e+04 ) sec^-1 +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.016595e+03 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index cd5c681c8c..6e71297983 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,41 +1,21 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg - -make USEBUILDDIR=1 BACKEND=cuda - - - -make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppsse4 - -make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' - -make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' -make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + +make: Nothing to be done for 'all'. + OMP_NUM_THREADS= -DATE: 2024-10-03_02:29:14 +DATE: 2024-10-04_13:30:16 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +29,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 100.0620s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5189s - [COUNTERS] Fortran MEs ( 1 ) : 99.5431s for 8192 events => throughput is 8.23E+01 events/s + [COUNTERS] PROGRAM TOTAL : 55.2559s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3532s + [COUNTERS] Fortran MEs ( 1 ) : 54.9027s for 8192 events => throughput is 1.49E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +54,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 100.3451s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5174s - [COUNTERS] Fortran MEs ( 1 ) : 99.8277s for 8192 events => throughput is 8.21E+01 events/s + [COUNTERS] PROGRAM TOTAL : 55.1771s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s + [COUNTERS] Fortran MEs ( 1 ) : 54.7651s for 8192 events => throughput is 1.50E+02 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +79,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1003.8857s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4277s - [COUNTERS] Fortran MEs ( 1 ) : 999.4580s for 81920 events => throughput is 8.20E+01 events/s + [COUNTERS] PROGRAM TOTAL : 551.6162s + [COUNTERS] Fortran Overhead ( 0 ) : 2.7869s + [COUNTERS] Fortran MEs ( 1 ) : 548.8293s for 81920 events => throughput is 1.49E+02 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +104,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561678995975E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019963403161E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 123.2681s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5157s - [COUNTERS] CudaCpp MEs ( 2 ) : 122.5482s for 8192 events => throughput is 6.68E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2041s + [COUNTERS] PROGRAM TOTAL : 86.7707s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4346s + [COUNTERS] CudaCpp MEs ( 2 ) : 86.1409s for 8192 events => throughput is 9.51E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1952s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561678995975E-007) differ by less than 2E-4 (5.417890580616813e-09) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019963403161E-007) differ by less than 2E-4 (5.416306958494488e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +139,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713238614534E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858650293213E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 1239.6410s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3289s - [COUNTERS] CudaCpp MEs ( 2 ) : 1235.1064s for 81920 events => throughput is 6.63E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2057s + [COUNTERS] PROGRAM TOTAL : 868.4026s + [COUNTERS] Fortran Overhead ( 0 ) : 2.8203s + [COUNTERS] CudaCpp MEs ( 2 ) : 865.4484s for 81920 events => throughput is 9.47E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1339s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713238614534E-007) differ by less than 2E-4 (5.38380851011766e-09) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858650293213E-007) differ by less than 2E-4 (5.3828717039294816e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.864466e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.193941e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.890596e+01 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.189969e+02 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +184,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561701257335E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019985761424E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 61.9882s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5115s - [COUNTERS] CudaCpp MEs ( 2 ) : 61.3746s for 8192 events => throughput is 1.33E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1021s + [COUNTERS] PROGRAM TOTAL : 42.2548s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3715s + [COUNTERS] CudaCpp MEs ( 2 ) : 41.8184s for 8192 events => throughput is 1.96E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0649s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561701257335E-007) differ by less than 2E-4 (6.3622664914220195e-09) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019985761424E-007) differ by less than 2E-4 (6.364815563486559e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +219,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713242471448E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858654239918E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 618.7847s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3324s - [COUNTERS] CudaCpp MEs ( 2 ) : 614.3530s for 81920 events => throughput is 1.33E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0993s + [COUNTERS] PROGRAM TOTAL : 426.7406s + [COUNTERS] Fortran Overhead ( 0 ) : 3.8760s + [COUNTERS] CudaCpp MEs ( 2 ) : 422.8001s for 81920 events => throughput is 1.94E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0645s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713242471448E-007) differ by less than 2E-4 (5.552655002460938e-09) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858654239918E-007) differ by less than 2E-4 (5.555647941690722e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.600496e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.472663e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.598870e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.481727e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +264,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572019990398792E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 27.3953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5156s - [COUNTERS] CudaCpp MEs ( 2 ) : 26.8357s for 8192 events => throughput is 3.05E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0440s + [COUNTERS] PROGRAM TOTAL : 25.1693s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9111s + [COUNTERS] CudaCpp MEs ( 2 ) : 19.0002s for 8192 events => throughput is 4.31E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 4.2579s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) +OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019990398792E-007) differ by less than 2E-4 (6.5615473054947415e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,309 +299,45 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842858652988808E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 270.5862s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3334s - [COUNTERS] CudaCpp MEs ( 2 ) : 266.2094s for 81920 events => throughput is 3.08E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0434s + [COUNTERS] PROGRAM TOTAL : 193.2577s + [COUNTERS] Fortran Overhead ( 0 ) : 3.0250s + [COUNTERS] CudaCpp MEs ( 2 ) : 190.2033s for 81920 events => throughput is 4.31E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0294s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) +OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858652988808E-007) differ by less than 2E-4 (5.500877753306099e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.729666e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.712586e+02 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 24.1058s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5083s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.5601s for 8192 events => throughput is 3.48E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0374s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 238.9805s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3052s - [COUNTERS] CudaCpp MEs ( 2 ) : 234.6373s for 81920 events => throughput is 3.49E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0380s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.313097e+02 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.298085e+02 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 24.6954s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5093s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.1424s for 8192 events => throughput is 3.39E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0437s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 245.9606s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3057s - [COUNTERS] CudaCpp MEs ( 2 ) : 241.6115s for 81920 events => throughput is 3.39E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0435s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.675482e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.509905e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.688823e+02 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' -DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572561518129465E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 2.8142s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0560s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.8779s for 8192 events => throughput is 9.33E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.8804s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561518129465E-007) differ by less than 2E-4 (1.4064212017217415e-09) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' -DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 128/128 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842713109538129E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 14.3181s - [COUNTERS] Fortran Overhead ( 0 ) : 4.8117s - [COUNTERS] CudaCpp MEs ( 2 ) : 8.6324s for 81920 events => throughput is 9.49E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.8741s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713109538129E-007) differ by less than 2E-4 (2.668514298420632e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical - -*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.385803e+03 ) sec^-1 - -*** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.083008e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.106276e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX) -p 512 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.157843e+04 ) sec^-1 - -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.105164e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.466414e+02 ) sec^-1 -*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.108864e+04 ) sec^-1 +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.109773e+04 ) sec^-1 +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** -Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.669145e+03 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index b69bdf2fc8..200d2a01cc 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-10-03_00:13:18 +DATE: 2024-10-04_12:10:01 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.5125s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4425s - [COUNTERS] Fortran MEs ( 1 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4368s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3900s + [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4614s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3919s - [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3329s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2862s + [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.5135s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8238s - [COUNTERS] Fortran MEs ( 1 ) : 0.6897s for 81920 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7102s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2439s + [COUNTERS] Fortran MEs ( 1 ) : 0.4663s for 81920 events => throughput is 1.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737132] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4747s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3987s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0753s for 8192 events => throughput is 1.09E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.3440s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2879s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0557s for 8192 events => throughput is 1.47E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737132) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456871) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376575784] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.5959s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8445s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7507s for 81920 events => throughput is 1.09E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 1.8088s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2508s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5576s for 81920 events => throughput is 1.47E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575784) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.104333e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.501528e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.103333e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.500779e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737170] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701704456874] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4348s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3924s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3244s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2926s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737170) differ by less than 3E-14 (2.220446049250313e-15) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456874) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427590] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2944s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8727s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4211s for 81920 events => throughput is 1.95E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.5574s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2441s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3130s for 81920 events => throughput is 2.62E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427590) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575781) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.906811e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.595848e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.965411e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.601905e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4283s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0238s for 8192 events => throughput is 3.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3104s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2934s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0168s for 8192 events => throughput is 4.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456871) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,120 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376575775] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.0830s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8442s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 81920 events => throughput is 3.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.4143s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2459s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1681s for 81920 events => throughput is 4.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575775) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.370259e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.032176e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.306992e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4190s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3967s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.74E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.080030e+05 ) sec^-1 -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1007s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8793s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2209s for 81920 events => throughput is 3.71E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.662156e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.791971e+05 ) sec^-1 +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4278s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3952s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0322s for 8192 events => throughput is 2.55E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.5758s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5577s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0104s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) +OK! xsec from fortran (0.20313701704456871) and hip (0.20313701704456871) differ by less than 3E-14 (0.0) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -479,149 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1576s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8414s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3157s for 81920 events => throughput is 2.59E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + [COUNTERS] PROGRAM TOTAL : 1.5727s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5219s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0402s for 81920 events => throughput is 2.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0106s -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.479668e+05 ) sec^-1 +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.507584e+05 ) sec^-1 +OK! xsec from fortran (0.21095771376575781) and hip (0.21095771376575781) differ by less than 3E-14 (0.0) -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737173] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.8445s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8402s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.56E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504505737173) differ by less than 3E-14 (2.220446049250313e-15) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2812s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2704s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0098s for 81920 events => throughput is 8.39E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.052839e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.050073e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.425419e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.958312e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.341421e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.477111e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.151138e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.794526e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.326674e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.464201e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.296661e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.794149e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.336891e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.430799e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.653723e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.197375e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index ef9be9efc8..f0273e55a1 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-10-03_00:13:50 +DATE: 2024-10-04_12:10:22 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.5037s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4359s - [COUNTERS] Fortran MEs ( 1 ) : 0.0679s for 8192 events => throughput is 1.21E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3658s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3191s + [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4646s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3955s - [COUNTERS] Fortran MEs ( 1 ) : 0.0691s for 8192 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3361s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2893s + [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.5081s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8204s - [COUNTERS] Fortran MEs ( 1 ) : 0.6877s for 81920 events => throughput is 1.19E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7102s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2435s + [COUNTERS] Fortran MEs ( 1 ) : 0.4667s for 81920 events => throughput is 1.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313506133732837] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313702859087712] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4665s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3948s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0711s for 8192 events => throughput is 1.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.3432s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0508s for 8192 events => throughput is 1.61E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313506133732837) differ by less than 4E-4 (8.014351782215101e-08) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313702859087712) differ by less than 4E-4 (5.6840001816382824e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842907143103] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095770771365008] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.5534s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8451s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7077s for 81920 events => throughput is 1.16E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 1.7678s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2609s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5066s for 81920 events => throughput is 1.62E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842907143103) differ by less than 4E-4 (1.4085954624931674e-09) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095770771365008) differ by less than 4E-4 (2.86887245071199e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.157236e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.685173e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.172561e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.679429e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313502997679400] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313700465139972] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4213s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3956s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 8192 events => throughput is 3.23E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3138s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2931s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0205s for 8192 events => throughput is 4.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502997679400) differ by less than 4E-4 (7.423917058879681e-08) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313700465139972) differ by less than 4E-4 (6.100891492000216e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095839656505114] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095768752291760] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1080s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8479s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2597s for 81920 events => throughput is 3.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.5581s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3601s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1979s for 81920 events => throughput is 4.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839656505114) differ by less than 4E-4 (1.5268043562777223e-07) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095768752291760) differ by less than 4E-4 (1.2439858076973564e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.049325e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.149490e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.028245e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.120908e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313700354235445] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4204s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4062s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3221s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3116s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0104s for 8192 events => throughput is 7.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313700354235445) differ by less than 4E-4 (6.646850714275843e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095768538537163] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.9551s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8278s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1270s for 81920 events => throughput is 6.45E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.4400s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3371s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1028s for 81920 events => throughput is 7.97E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095768538537163) differ by less than 4E-4 (1.3453116110007102e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.240683e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.044738e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.282933e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.240258e+05 ) sec^-1 -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4123s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4000s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0120s for 8192 events => throughput is 6.83E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.9645s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8463s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1178s for 81920 events => throughput is 6.95E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.681108e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.800809e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313505300145301] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4128s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3966s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.17E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313505300145301) differ by less than 4E-4 (3.910739154733278e-08) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842133012335] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.0059s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8483s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1573s for 81920 events => throughput is 5.21E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842133012335) differ by less than 4E-4 (3.528729641821826e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.857587e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.809270e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313508590887899] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313702542257728] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.8343s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8305s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.81E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 0.6091s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5921s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.49E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0116s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cuda (0.20313508590887899) differ by less than 4E-4 (2.011051698502797e-07) +OK! xsec from fortran (0.20313701704456871) and hip (0.20313702542257728) differ by less than 4E-4 (4.1243140680435886e-08) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095846337765808] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095770853284573] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2771s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2677s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 81920 events => throughput is 9.50E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 1.6569s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6329s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 81920 events => throughput is 6.20E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0109s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cuda (0.21095846337765808) differ by less than 4E-4 (1.640293887383848e-07) +OK! xsec from fortran (0.21095771376575781) and hip (0.21095770853284573) differ by less than 4E-4 (2.48055024298921e-08) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.194095e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.483989e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.453243e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.415900e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.153983e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.291731e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.705356e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.272401e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.151283e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.300164e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.697710e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.331894e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.773293e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.155572e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.223076e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.467689e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index eaa612a29b..1f173fb3cf 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-10-03_00:14:20 +DATE: 2024-10-04_12:10:43 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.5085s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4389s - [COUNTERS] Fortran MEs ( 1 ) : 0.0696s for 8192 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3650s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3179s + [COUNTERS] Fortran MEs ( 1 ) : 0.0471s for 8192 events => throughput is 1.74E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4620s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3921s - [COUNTERS] Fortran MEs ( 1 ) : 0.0698s for 8192 events => throughput is 1.17E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3363s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2895s + [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.5215s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8261s - [COUNTERS] Fortran MEs ( 1 ) : 0.6954s for 81920 events => throughput is 1.18E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.7140s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2460s + [COUNTERS] Fortran MEs ( 1 ) : 0.4680s for 81920 events => throughput is 1.75E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504495344831] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701694845307] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4810s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4047s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0757s for 8192 events => throughput is 1.08E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.3484s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2922s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0558s for 8192 events => throughput is 1.47E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344831) differ by less than 2E-4 (5.115954326839756e-10) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701694845307) differ by less than 2E-4 (4.731567360138911e-10) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376532396] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.6118s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8599s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7512s for 81920 events => throughput is 1.09E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 1.8043s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2480s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5558s for 81920 events => throughput is 1.47E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376532396) differ by less than 2E-4 (2.05657713081564e-12) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.104505e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.486112e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.100300e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.508546e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504495344833] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701694845307] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4404s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3990s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0409s for 8192 events => throughput is 2.00E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3234s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2914s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 8192 events => throughput is 2.58E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344833) differ by less than 2E-4 (5.115952106393706e-10) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701694845307) differ by less than 2E-4 (4.731567360138911e-10) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771376532396] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2479s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8434s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4040s for 81920 events => throughput is 2.03E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.5619s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2439s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3177s for 81920 events => throughput is 2.58E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376532396) differ by less than 2E-4 (2.05657713081564e-12) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.946818e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.594398e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.967726e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.583395e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701710149187] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4208s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3969s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.3099s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2934s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0163s for 8192 events => throughput is 5.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) +OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701710149187) differ by less than 2E-4 (2.8022051345999444e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771374576316] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.0815s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8434s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2376s for 81920 events => throughput is 3.45E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.4109s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2484s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1623s for 81920 events => throughput is 5.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) +OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771374576316) differ by less than 2E-4 (9.478029472376193e-11) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.431109e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.419345e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4156s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3944s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.95E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1079s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8862s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2213s for 81920 events => throughput is 3.70E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.868620e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.158200e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.922480e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' -DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 - [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.4329s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3984s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0341s for 8192 events => throughput is 2.40E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.176426e+05 ) sec^-1 -OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' -DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 - [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.1810s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8478s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3327s for 81920 events => throughput is 2.46E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.411172e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.413465e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313504512110778] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313701710728185] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.8355s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8314s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.5973s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5787s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0109s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504512110778) differ by less than 2E-4 (3.1376434783680907e-10) +OK! xsec from fortran (0.20313701704456871) and hip (0.20313701710728185) differ by less than 2E-4 (3.087232691711961e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095842873460982] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095771372611694] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 2.2766s - [COUNTERS] Fortran Overhead ( 0 ) : 2.2655s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 81920 events => throughput is 8.22E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 1.5742s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5233s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0402s for 81920 events => throughput is 2.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0107s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842873460982) differ by less than 2E-4 (1.8802814860663375e-10) +OK! xsec from fortran (0.21095771376575781) and hip (0.21095771372611694) differ by less than 2E-4 (1.8790913269839393e-10) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.015948e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.055221e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.328513e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.924559e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.335551e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.490650e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.198409e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.821752e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.343564e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.537152e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.282279e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.812009e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.337961e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.450255e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.656673e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.274935e+05 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index a6c1729b94..46f4c2db0c 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-10-03_03:45:28 +DATE: 2024-10-04_14:13:12 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9406s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8948s - [COUNTERS] Fortran MEs ( 1 ) : 0.0458s for 8192 events => throughput is 1.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3108s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2785s + [COUNTERS] Fortran MEs ( 1 ) : 0.0323s for 8192 events => throughput is 2.54E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4425s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3963s - [COUNTERS] Fortran MEs ( 1 ) : 0.0462s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8811s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8489s + [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.9883s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5220s - [COUNTERS] Fortran MEs ( 1 ) : 0.4663s for 81920 events => throughput is 1.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.6334s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3116s + [COUNTERS] Fortran MEs ( 1 ) : 0.3217s for 81920 events => throughput is 2.55E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755170] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755334] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4465s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3954s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0506s for 8192 events => throughput is 1.62E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.9361s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9003s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 8192 events => throughput is 2.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755170) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755334) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865325] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0112s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5194s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4914s for 81920 events => throughput is 1.67E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 4.7401s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3838s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3561s for 81920 events => throughput is 2.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865325) differ by less than 3E-14 (1.1102230246251565e-14) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.689411e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.303016e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.699143e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.356917e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755347] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4240s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3969s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0267s for 8192 events => throughput is 3.07E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.7265s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7055s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 8192 events => throughput is 3.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755183) differ by less than 3E-14 (0.0) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755347) differ by less than 3E-14 (8.881784197001252e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865338] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7904s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5236s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2664s for 81920 events => throughput is 3.07E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 4.5576s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3402s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2172s for 81920 events => throughput is 3.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865338) differ by less than 3E-14 (1.0436096431476471e-14) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.025435e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.905511e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.992417e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.063650e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755325] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4119s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3956s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0159s for 8192 events => throughput is 5.14E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.6423s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6308s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.24E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755325) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.6859s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5201s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1654s for 81920 events => throughput is 4.95E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 4.3083s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1955s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1126s for 81920 events => throughput is 7.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865552) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.043958e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.982138e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 - [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3970s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0149s for 8192 events => throughput is 5.51E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 - [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.6925s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5412s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1509s for 81920 events => throughput is 5.43E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.422693e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.441710e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.463064e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755179] fbridge_mode=1 - [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4198s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3970s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755179) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.483571e+05 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 - [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7497s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5314s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2179s for 81920 events => throughput is 3.76E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.594328e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.651571e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755192] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755356] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.8424s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8384s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.55E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.9220s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9070s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0091s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081479755192) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (2.0160081479755330) and hip (2.0160081479755356) differ by less than 3E-14 (1.3322676295501878e-15) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865294] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865352] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.9702s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9603s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 9.01E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 4.4975s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4693s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 81920 events => throughput is 4.43E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0098s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713375865294) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (2.0336713375865552) and hip (2.0336713375865352) differ by less than 3E-14 (9.880984919163893e-15) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.955075e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.431899e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.400755e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.357175e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.826601e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.490479e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.117685e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.228740e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.829763e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.486032e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.475228e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.607853e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.836271e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.477472e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.541450e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.529467e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index ab10ba65ee..fb2002923f 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-10-03_03:45:56 +DATE: 2024-10-04_14:13:51 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9331s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8867s - [COUNTERS] Fortran MEs ( 1 ) : 0.0464s for 8192 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9546s + [COUNTERS] Fortran Overhead ( 0 ) : 0.9225s + [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4488s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4019s - [COUNTERS] Fortran MEs ( 1 ) : 0.0469s for 8192 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6395s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6073s + [COUNTERS] Fortran MEs ( 1 ) : 0.0322s for 8192 events => throughput is 2.55E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.9841s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5204s - [COUNTERS] Fortran MEs ( 1 ) : 0.4638s for 81920 events => throughput is 1.77E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.4604s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1387s + [COUNTERS] Fortran MEs ( 1 ) : 0.3217s for 81920 events => throughput is 2.55E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,34 +124,34 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160406825242951] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160406546722180] fbridge_mode=1 [UNWEIGHT] Wrote 1653 events (found 1658 events) - [COUNTERS] PROGRAM TOTAL : 0.4519s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4050s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0466s for 8192 events => throughput is 1.76E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.6404s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160406825242951) differ by less than 4E-4 (1.6138103811513815e-05) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160406546722180) differ by less than 4E-4 (1.61242883456314e-05) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 7562,7575d7561 < 4 1 1E-03 0.1250010E+03 0.7546771E-02 0.1235066E+00 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499473E+02 0.71320499473E+02 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.54771239790E+02 0.54771239790E+02 0.00000000000E+00 0. 1. -< 5 1 1 2 501 0 0.50303102232E+02 0.36190119942E+02 0.14973002893E+02 0.63925016162E+02 0.47000000000E+01 0. -1. -< -5 1 1 2 0 501 -0.50303102232E+02 -0.36190119942E+02 0.15762567893E+01 0.62166723101E+02 0.47000000000E+01 0. -1. +< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499550E+02 0.71320499550E+02 0.00000000000E+00 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.54771239731E+02 0.54771239731E+02 0.00000000000E+00 0. 1. +< 5 1 1 2 501 0 0.50303102232E+02 0.36190119942E+02 0.14973002962E+02 0.63925016178E+02 0.47000000000E+01 0. -1. +< -5 1 1 2 0 501 -0.50303102232E+02 -0.36190119942E+02 0.15762568567E+01 0.62166723103E+02 0.47000000000E+01 0. -1. < < 0 0.12500099E+03 < 0 diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index f07c5f8fb7..4d77d149f7 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx - -make USEBUILDDIR=1 BACKEND=cuda - +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' + +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-10-03_03:46:02 +DATE: 2024-10-04_14:14:00 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9413s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8957s - [COUNTERS] Fortran MEs ( 1 ) : 0.0456s for 8192 events => throughput is 1.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1257s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0933s + [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4467s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4006s - [COUNTERS] Fortran MEs ( 1 ) : 0.0461s for 8192 events => throughput is 1.78E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6437s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6116s + [COUNTERS] Fortran MEs ( 1 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0497s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5663s - [COUNTERS] Fortran MEs ( 1 ) : 0.4834s for 81920 events => throughput is 1.69E+05 events/s + [COUNTERS] PROGRAM TOTAL : 4.4523s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1304s + [COUNTERS] Fortran MEs ( 1 ) : 0.3220s for 81920 events => throughput is 2.54E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,25 +124,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081964453331] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081964453460] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4441s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3939s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0498s for 8192 events => throughput is 1.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.6429s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6069s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0357s for 8192 events => throughput is 2.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453331) differ by less than 2E-4 (2.4042469792817656e-08) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081964453460) differ by less than 2E-4 (2.4042468904639236e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,25 +160,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713843200420] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713843200616] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 2.0264s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5298s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4961s for 81920 events => throughput is 1.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 4.4982s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3556s for 81920 events => throughput is 2.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200420) differ by less than 2E-4 (2.2979875113904313e-08) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713843200616) differ by less than 2E-4 (2.297987178323524e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -187,15 +187,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.571027e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.260726e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.590282e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.291412e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,25 +209,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081964453336] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081964453469] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4241s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3968s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.6779s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6568s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453336) differ by less than 2E-4 (2.404247001486226e-08) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081964453469) differ by less than 2E-4 (2.4042469348728446e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -245,25 +245,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713843200425] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713843200620] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7845s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5165s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2676s for 81920 events => throughput is 3.06E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 4.3653s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1576s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2075s for 81920 events => throughput is 3.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200425) differ by less than 2E-4 (2.2979875335948918e-08) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713843200620) differ by less than 2E-4 (2.2979872005279844e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -272,15 +272,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.828390e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.881285e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.883903e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.024699e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,25 +294,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081962974865] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4160s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3987s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0169s for 8192 events => throughput is 4.84E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.6276s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6164s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 8192 events => throughput is 7.38E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08) +OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081962974865) differ by less than 2E-4 (2.3969126017320264e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -330,25 +330,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713836598834] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.6964s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5293s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1667s for 81920 events => throughput is 4.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 4.2440s + [COUNTERS] Fortran Overhead ( 0 ) : 4.1333s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1106s for 81920 events => throughput is 7.41E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08) +OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713836598834) differ by less than 2E-4 (2.2655247899905362e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -357,102 +357,23 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.810097e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.263617e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.776953e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 - [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4132s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3979s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.45E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.697282e+05 ) sec^-1 -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 - [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.6608s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5101s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1503s for 81920 events => throughput is 5.45E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.113673e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.135155e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -464,31 +385,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081962970020] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081483021464] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.4199s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3970s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0225s for 8192 events => throughput is 3.64E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.9033s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8885s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.36E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0087s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962970020) differ by less than 2E-4 (2.3968893092529697e-08) +OK! xsec from fortran (2.0160081479755330) and hip (2.0160081483021464) differ by less than 2E-4 (1.6200996100224074e-10) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -500,153 +420,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713836598515] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713380111582] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.7604s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5294s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2307s for 81920 events => throughput is 3.55E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 4.5065s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 81920 events => throughput is 4.40E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0091s -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598515) differ by less than 2E-4 (2.2655245235370103e-08) +OK! xsec from fortran (2.0336713375865552) and hip (2.0336713380111582) differ by less than 2E-4 (2.0878654360956261e-10) -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.151070e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.343164e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' -DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081483021330] fbridge_mode=1 - [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.8378s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8340s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081483021330) differ by less than 2E-4 (1.6201062713605552e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' -DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713380111449] fbridge_mode=1 - [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 1.9761s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9663s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.06E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713380111449) differ by less than 2E-4 (2.0879298290310544e-10) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.928935e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.436985e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.339519e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.357929e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.817995e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.491674e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.148245e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.313248e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.818249e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.496904e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.450546e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.622089e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.807173e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.483481e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.482355e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.597049e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index 892b3fd5e1..cd23937ee4 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-10-03_03:49:14 +DATE: 2024-10-04_14:16:23 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.5790s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3507s - [COUNTERS] Fortran MEs ( 1 ) : 2.2283s for 8192 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.8274s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3917s + [COUNTERS] Fortran MEs ( 1 ) : 1.4357s for 8192 events => throughput is 5.71E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.5936s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3553s - [COUNTERS] Fortran MEs ( 1 ) : 2.2383s for 8192 events => throughput is 3.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6752s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2615s + [COUNTERS] Fortran MEs ( 1 ) : 1.4138s for 8192 events => throughput is 5.79E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 24.3811s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0276s - [COUNTERS] Fortran MEs ( 1 ) : 22.3535s for 81920 events => throughput is 3.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.7401s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4278s + [COUNTERS] Fortran MEs ( 1 ) : 14.3123s for 81920 events => throughput is 5.72E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728557E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.7630s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3551s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4030s for 8192 events => throughput is 3.41E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s + [COUNTERS] PROGRAM TOTAL : 1.9585s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2934s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.6618s for 8192 events => throughput is 4.93E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0033s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728557E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898222E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 26.1710s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0362s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.1298s for 81920 events => throughput is 3.39E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s + [COUNTERS] PROGRAM TOTAL : 19.0374s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4244s + [COUNTERS] CudaCpp MEs ( 2 ) : 17.6097s for 81920 events => throughput is 4.65E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0033s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898148E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898222E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.559366e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.543929e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.558371e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.559679e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728610E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.6265s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3630s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2607s for 8192 events => throughput is 6.50E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + [COUNTERS] PROGRAM TOTAL : 1.3005s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3388s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9597s for 8192 events => throughput is 8.54E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728610E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728536E-007) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898191E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898275E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 14.5841s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0288s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.5525s for 81920 events => throughput is 6.53E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [COUNTERS] PROGRAM TOTAL : 11.0954s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5152s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5781s for 81920 events => throughput is 8.55E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898191E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898275E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.776735e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.885187e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.761919e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.823104e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728525E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9120s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3562s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5544s for 8192 events => throughput is 1.48E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + [COUNTERS] PROGRAM TOTAL : 0.7435s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2974s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4451s for 8192 events => throughput is 1.84E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728525E-007) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898233E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 7.5522s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0125s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.5382s for 81920 events => throughput is 1.48E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + [COUNTERS] PROGRAM TOTAL : 6.0265s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5817s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4437s for 81920 events => throughput is 1.84E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898233E-007) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.525780e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.523425e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8665s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3590s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5061s for 8192 events => throughput is 1.62E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 6.9898s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0215s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.9669s for 81920 events => throughput is 1.65E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.714953e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.954606e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.727026e+04 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9876s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3520s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6339s for 8192 events => throughput is 1.29E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.045230e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 8.3820s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0136s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.3668s for 81920 events => throughput is 1.29E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.309804e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.309629e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728578E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728514E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8331s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7937s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 8192 events => throughput is 4.16E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s + [COUNTERS] PROGRAM TOTAL : 0.8018s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6803s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0647s for 8192 events => throughput is 1.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0569s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610362728578E-007) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381610362728514E-007) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.6470s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4512s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1762s for 81920 events => throughput is 4.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0196s + [COUNTERS] PROGRAM TOTAL : 2.2955s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6338s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6060s for 81920 events => throughput is 1.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0557s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6542926582898244E-007) differ by less than 3E-14 (0.0) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.238235e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.285923e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.533678e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.807814e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.854781e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.813672e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.206482e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.210704e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.790740e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.821036e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.229997e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.262482e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.764026e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.814985e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.687249e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.225752e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 1da536828f..a6801e5689 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-10-03_03:51:30 +DATE: 2024-10-04_14:18:29 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.5732s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3484s - [COUNTERS] Fortran MEs ( 1 ) : 2.2248s for 8192 events => throughput is 3.68E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.7715s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2563s + [COUNTERS] Fortran MEs ( 1 ) : 1.5152s for 8192 events => throughput is 5.41E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.5858s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3527s - [COUNTERS] Fortran MEs ( 1 ) : 2.2331s for 8192 events => throughput is 3.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.6636s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2590s + [COUNTERS] Fortran MEs ( 1 ) : 1.4046s for 8192 events => throughput is 5.83E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 24.3640s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0153s - [COUNTERS] Fortran MEs ( 1 ) : 22.3487s for 81920 events => throughput is 3.67E+03 events/s + [COUNTERS] PROGRAM TOTAL : 15.2868s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3433s + [COUNTERS] Fortran MEs ( 1 ) : 13.9435s for 81920 events => throughput is 5.88E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381686438954397E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381684214474469E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.7241s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3585s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.3605s for 8192 events => throughput is 3.47E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s + [COUNTERS] PROGRAM TOTAL : 1.8552s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2751s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.5760s for 8192 events => throughput is 5.20E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0040s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686438954397E-007) differ by less than 4E-4 (9.960018576560259e-07) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381684214474469E-007) differ by less than 4E-4 (9.668786189465095e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542978900095690E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542976447681378E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 25.6088s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0243s - [COUNTERS] CudaCpp MEs ( 2 ) : 23.5796s for 81920 events => throughput is 3.47E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s + [COUNTERS] PROGRAM TOTAL : 18.4227s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4162s + [COUNTERS] CudaCpp MEs ( 2 ) : 17.0033s for 81920 events => throughput is 4.82E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542978900095690E-007) differ by less than 4E-4 (6.835014008110818e-07) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542976447681378E-007) differ by less than 4E-4 (6.514616746056134e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.595330e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.678196e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.592962e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.691049e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381671483253128E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381673102586798E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.0090s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3576s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6499s for 8192 events => throughput is 1.26E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + [COUNTERS] PROGRAM TOTAL : 0.8144s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3119s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5007s for 8192 events => throughput is 1.64E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381671483253128E-007) differ by less than 4E-4 (8.001994753481512e-07) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381673102586798E-007) differ by less than 4E-4 (8.214000459805249e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542962735029303E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542965612263376E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 8.5774s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0289s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.5470s for 81920 events => throughput is 1.25E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 6.4975s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5274s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.9690s for 81920 events => throughput is 1.65E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542962735029303E-007) differ by less than 4E-4 (4.7231184874263477e-07) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542965612263376E-007) differ by less than 4E-4 (5.09901657563816e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.276959e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.691506e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.272430e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.683782e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381674937970992E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6541s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3627s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2905s for 8192 events => throughput is 2.82E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 0.5370s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3041s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2322s for 8192 events => throughput is 3.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381674937970992E-007) differ by less than 4E-4 (8.454291831050398e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542993199513089E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 4.8460s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0109s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.8342s for 81920 events => throughput is 2.89E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s + [COUNTERS] PROGRAM TOTAL : 3.8381s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5389s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2986s for 81920 events => throughput is 3.56E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542993199513089E-007) differ by less than 4E-4 (8.703170601975785e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.994182e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.988531e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6112s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3544s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2560s for 8192 events => throughput is 3.20E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 4.5717s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0031s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5679s for 81920 events => throughput is 3.19E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.282515e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.666190e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.307160e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.650647e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381686320975603E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.6848s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3268s for 8192 events => throughput is 2.51E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686320975603E-007) differ by less than 4E-4 (9.944572607611946e-07) +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6543004237976207E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 5.2685s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0219s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.2456s for 81920 events => throughput is 2.52E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6543004237976207E-007) differ by less than 4E-4 (1.014529774634454e-06) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.530666e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.550885e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381711031958629E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381687553340853E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8332s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7964s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 8192 events => throughput is 4.18E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s + [COUNTERS] PROGRAM TOTAL : 0.7076s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6167s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0365s for 8192 events => throughput is 2.24E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0544s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381711031958629E-007) differ by less than 4E-4 (1.3179773188376487e-06) +OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381687553340853E-007) differ by less than 4E-4 (1.0105915801972287e-06) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6543026921346333E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6543007309341497E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.6217s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4453s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1593s for 81920 events => throughput is 5.14E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s + [COUNTERS] PROGRAM TOTAL : 2.3731s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9820s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3363s for 81920 events => throughput is 2.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0547s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6543026921346333E-007) differ by less than 4E-4 (1.3108781262705094e-06) +OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6543007309341497E-007) differ by less than 4E-4 (1.0546558233404113e-06) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.242479e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.332012e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.443260e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.661724e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.299498e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.665894e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.323299e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.497446e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.300630e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.664462e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.333556e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.326834e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.292961e+06 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.632827e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.657294e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.430627e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index bec5746083..de2ab0c200 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx - -make USEBUILDDIR=1 BACKEND=cuda +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make USEBUILDDIR=1 BACKEND=cppsse4 -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cpp512y make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-10-03_03:53:23 +DATE: 2024-10-04_14:20:09 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 2.5908s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3522s - [COUNTERS] Fortran MEs ( 1 ) : 2.2386s for 8192 events => throughput is 3.66E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.8730s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2870s + [COUNTERS] Fortran MEs ( 1 ) : 1.5860s for 8192 events => throughput is 5.17E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.5989s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3536s - [COUNTERS] Fortran MEs ( 1 ) : 2.2453s for 8192 events => throughput is 3.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 1.8889s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2903s + [COUNTERS] Fortran MEs ( 1 ) : 1.5986s for 8192 events => throughput is 5.12E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 24.4959s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0436s - [COUNTERS] Fortran MEs ( 1 ) : 22.4523s for 81920 events => throughput is 3.65E+03 events/s + [COUNTERS] PROGRAM TOTAL : 17.5690s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5059s + [COUNTERS] Fortran MEs ( 1 ) : 16.0631s for 81920 events => throughput is 5.10E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608764955655E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608764955570E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.7880s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3539s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4293s for 8192 events => throughput is 3.37E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s + [COUNTERS] PROGRAM TOTAL : 2.1883s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3189s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.8656s for 8192 events => throughput is 4.39E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0037s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608764955655E-007) differ by less than 2E-4 (2.0918293319738268e-08) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608764955570E-007) differ by less than 2E-4 (2.0918293763827478e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925018181681E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542925018181723E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 26.4223s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0309s - [COUNTERS] CudaCpp MEs ( 2 ) : 24.3863s for 81920 events => throughput is 3.36E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s + [COUNTERS] PROGRAM TOTAL : 20.1819s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5129s + [COUNTERS] CudaCpp MEs ( 2 ) : 18.6654s for 81920 events => throughput is 4.39E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0037s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925018181681E-007) differ by less than 2E-4 (2.044233915476923e-08) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542925018181723E-007) differ by less than 2E-4 (2.0442339820903044e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.446996e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.595880e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.474680e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.584557e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608686521600E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608686521537E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.6449s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3685s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2736s for 8192 events => throughput is 6.43E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s + [COUNTERS] PROGRAM TOTAL : 1.2548s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3190s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9338s for 8192 events => throughput is 8.77E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608686521600E-007) differ by less than 2E-4 (2.1945164241365944e-08) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608686521537E-007) differ by less than 2E-4 (2.194516446341055e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542924921991264E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542924921991233E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 14.5911s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0528s - [COUNTERS] CudaCpp MEs ( 2 ) : 12.5358s for 81920 events => throughput is 6.53E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s + [COUNTERS] PROGRAM TOTAL : 11.0387s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5040s + [COUNTERS] CudaCpp MEs ( 2 ) : 9.5327s for 81920 events => throughput is 8.59E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542924921991264E-007) differ by less than 2E-4 (2.1699025132271288e-08) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542924921991233E-007) differ by less than 2E-4 (2.1699026797605825e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.890337e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.975960e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.047724e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.924543e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608826200382E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9065s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3553s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5497s for 8192 events => throughput is 1.49E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + [COUNTERS] PROGRAM TOTAL : 0.7407s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2994s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4394s for 8192 events => throughput is 1.86E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) +OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608826200382E-007) differ by less than 2E-4 (2.0116467158715068e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542925056010384E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 7.5428s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0133s - [COUNTERS] CudaCpp MEs ( 2 ) : 5.5280s for 81920 events => throughput is 1.48E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 5.9216s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5372s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3833s for 81920 events => throughput is 1.87E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) +OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542925056010384E-007) differ by less than 2E-4 (1.9948124929669575e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.522237e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.532222e+04 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8421s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3547s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4860s for 8192 events => throughput is 1.69E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 6.9661s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0341s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.9307s for 81920 events => throughput is 1.66E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.729032e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.918930e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.749814e+04 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.9989s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3556s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6416s for 8192 events => throughput is 1.28E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.920051e+04 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 64/64 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 - [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 8.5360s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0345s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.4998s for 81920 events => throughput is 1.26E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.215280e+04 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.243322e+04 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610372590318E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610372590265E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8391s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7995s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.14E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s + [COUNTERS] PROGRAM TOTAL : 0.7957s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6731s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0643s for 8192 events => throughput is 1.27E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0582s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610372590318E-007) differ by less than 2E-4 (1.2911138824733825e-10) +OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381610372590265E-007) differ by less than 2E-4 (1.2911138824733825e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926581386226E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926581386322E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.6398s - [COUNTERS] Fortran Overhead ( 0 ) : 2.4432s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1768s for 81920 events => throughput is 4.63E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s + [COUNTERS] PROGRAM TOTAL : 2.5113s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8511s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6018s for 81920 events => throughput is 1.36E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0583s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926581386226E-007) differ by less than 2E-4 (1.9752643964920935e-11) +OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6542926581386322E-007) differ by less than 2E-4 (1.9752643964920935e-11) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.207682e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.288285e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.525707e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.774779e+04 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.691636e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.826375e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.175385e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.219655e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.807412e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.826503e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.198574e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.240808e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.764129e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.834278e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.676928e+05 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.256536e+04 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 60dc72a754..deec2c77b7 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-10-03_03:47:55 +DATE: 2024-10-04_14:15:33 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.6671s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6586s - [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.60E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5894s + [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4144s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4060s - [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.79E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3126s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3065s + [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6469s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5651s - [COUNTERS] Fortran MEs ( 1 ) : 0.0818s for 81920 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.1278s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0728s + [COUNTERS] Fortran MEs ( 1 ) : 0.0550s for 81920 events => throughput is 1.49E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4228s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4141s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.87E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3345s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3276s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6561s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5734s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0824s for 81920 events => throughput is 9.94E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.1437s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0774s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0661s for 81920 events => throughput is 1.24E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207288) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.009926e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.260666e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.018079e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.270880e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4184s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4135s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.78E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3163s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3129s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.52E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6135s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5696s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 81920 events => throughput is 1.88E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.1171s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0846s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 81920 events => throughput is 2.53E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207288) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.910107e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.596609e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.994596e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.878355e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4110s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4079s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.98E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3211s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3191s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.34E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426120) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207294] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5890s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5608s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 81920 events => throughput is 2.95E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.1406s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1209s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 81920 events => throughput is 4.19E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207294) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.069685e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.335637e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5957s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5688s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0265s for 81920 events => throughput is 3.09E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.242302e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.731833e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.339112e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.966724e+06 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4220s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4182s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6593s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6273s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 81920 events => throughput is 2.59E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.878268e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.129733e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426109] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.8486s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8450s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.6393s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6268s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 8192 events => throughput is 1.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0074s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452343426109) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (0.30449452343426120) and hip (0.30449452343426120) differ by less than 3E-14 (0.0) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,9 +405,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -569,59 +415,57 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 2.0157s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0075s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 81920 events => throughput is 1.07E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.4474s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4306s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 81920 events => throughput is 9.93E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and hip (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.231093e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.585053e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.601013e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.572497e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.487661e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.485723e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.923690e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.923782e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.473112e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.300903e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.866909e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.447522e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.525381e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.419170e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.225466e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.906699e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 40e043e263..50a82667f2 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-10-03_03:48:21 +DATE: 2024-10-04_14:15:50 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.6695s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6613s - [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4774s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4720s + [COUNTERS] Fortran MEs ( 1 ) : 0.0054s for 8192 events => throughput is 1.53E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4107s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4028s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3108s + [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6449s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5625s - [COUNTERS] Fortran MEs ( 1 ) : 0.0824s for 81920 events => throughput is 9.94E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.1755s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1195s + [COUNTERS] Fortran MEs ( 1 ) : 0.0560s for 81920 events => throughput is 1.46E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446496609361] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446601800423] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4150s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4064s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.76E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3278s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3213s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446496609361) differ by less than 4E-4 (1.9201714018812766e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446601800423) differ by less than 4E-4 (1.8856252759213987e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305007079218] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305123565710] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6513s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5705s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 81920 events => throughput is 1.02E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.1684s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1100s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0582s for 81920 events => throughput is 1.41E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305007079218) differ by less than 4E-4 (1.858740792393121e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305123565710) differ by less than 4E-4 (1.8208556928911435e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.019290e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.375769e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.014848e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.585958e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446369440458] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446481959741] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4189s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4158s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.91E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3561s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3539s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.86E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446369440458) differ by less than 4E-4 (1.961935339744869e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446481959741) differ by less than 4E-4 (1.924982528933583e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747304961041555] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305120129920] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6073s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5801s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 81920 events => throughput is 3.04E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.1309s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1120s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0187s for 81920 events => throughput is 4.37E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747304961041555) differ by less than 4E-4 (1.8737136997515336e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305120129920) differ by less than 4E-4 (1.8219731212631984e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.109785e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.723661e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.217004e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.560242e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446707997274] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4105s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4085s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.63E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3433s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.77E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446707997274) differ by less than 4E-4 (1.8507488352970114e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305200358782] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5830s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5644s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0184s for 81920 events => throughput is 4.46E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.1239s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1108s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 81920 events => throughput is 6.30E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305200358782) differ by less than 4E-4 (1.7958801523665358e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.670603e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.314284e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4168s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4147s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.52E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5952s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5774s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0176s for 81920 events => throughput is 4.65E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.288976e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.778488e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.607414e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449447031649013] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4090s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4065s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.81E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449447031649013) differ by less than 4E-4 (1.744457354124762e-07) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.101338e+06 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305508949557] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6041s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5837s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 81920 events => throughput is 4.08E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305508949557) differ by less than 4E-4 (1.6955166515231213e-07) +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.367008e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.649645e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449447352014630] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446257236112] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.8469s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8433s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.68E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.6004s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5879s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 8192 events => throughput is 1.61E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0074s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cuda (0.30449447352014630) differ by less than 4E-4 (1.639245078566276e-07) +OK! xsec from fortran (0.30449452343426120) and hip (0.30449446257236112) differ by less than 4E-4 (1.998784719958735e-07) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305761315818] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747304644712603] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 2.0191s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0109s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 81920 events => throughput is 1.07E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.5346s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5164s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 81920 events => throughput is 8.16E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0081s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cuda (0.30747305761315818) differ by less than 4E-4 (1.6134391445099538e-07) +OK! xsec from fortran (0.30747310722207288) and hip (0.30747304644712603) differ by less than 4E-4 (1.9765939007765354e-07) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.218779e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.740887e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.617092e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.697485e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.685309e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.603233e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.178696e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.026789e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.647881e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.675123e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.181500e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.065938e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.209271e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.798785e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.664226e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.393472e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index b038a0f2b5..4928c87d09 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip - -make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-10-03_03:48:47 +DATE: 2024-10-04_14:16:07 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.6842s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6761s - [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4648s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4594s + [COUNTERS] Fortran MEs ( 1 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4107s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4027s - [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.3020s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2966s + [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.54E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6320s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5508s - [COUNTERS] Fortran MEs ( 1 ) : 0.0812s for 81920 events => throughput is 1.01E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.1192s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0653s + [COUNTERS] Fortran MEs ( 1 ) : 0.0539s for 81920 events => throughput is 1.52E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453160892020] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4181s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4096s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3183s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3115s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.23E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892020) differ by less than 2E-4 (2.6846653566892087e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311535940242] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6484s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5654s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0827s for 81920 events => throughput is 9.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.1472s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0801s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0670s for 81920 events => throughput is 1.22E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940242) differ by less than 2E-4 (2.6465174718381945e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.742532e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.238434e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.900727e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.323227e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453160892020] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4122s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.83E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3161s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3128s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892020) differ by less than 2E-4 (2.6846653566892087e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311535940242] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6172s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5740s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 81920 events => throughput is 1.91E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.1111s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0799s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0310s for 81920 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940242) differ by less than 2E-4 (2.6465174718381945e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.907045e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.904271e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.053191e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.962408e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453251780906] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4099s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4068s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.09E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3194s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3173s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.36E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453251780906) differ by less than 2E-4 (2.98315638858071e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,200 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311628550072] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5959s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5685s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 81920 events => throughput is 3.03E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.0984s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0797s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 81920 events => throughput is 4.42E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311628550072) differ by less than 2E-4 (2.947714006218405e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.250656e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.571390e+06 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4139s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4111s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.27E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) - -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5862s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5600s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0259s for 81920 events => throughput is 3.17E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.389797e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.086035e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.566056e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.298072e+06 ) sec^-1 -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' -DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 - [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.4209s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4175s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.71E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' -DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 4/4 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 2 - [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 - [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.6022s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5735s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0284s for 81920 events => throughput is 2.89E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.948781e+06 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.293600e+06 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -524,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452360186230] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452360186241] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.8489s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8453s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.6589s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6463s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 8192 events => throughput is 1.63E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0076s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452360186230) differ by less than 2E-4 (5.504239286580059e-10) +OK! xsec from fortran (0.30449452343426120) and hip (0.30449452360186241) differ by less than 2E-4 (5.504243727472158e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,69 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310720557364] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310720557375] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 2.0195s - [COUNTERS] Fortran Overhead ( 0 ) : 2.0110s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 81920 events => throughput is 1.03E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 1.3634s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3477s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.05E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0080s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310720557364) differ by less than 2E-4 (5.366074251611508e-11) +OK! xsec from fortran (0.30747310722207288) and hip (0.30747310720557375) differ by less than 2E-4 (5.366040944920769e-11) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.199891e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.657161e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.433914e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.738885e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.488918e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.485774e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.917817e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.658400e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.520898e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.505719e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.908547e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.902088e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.523903e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.407832e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.248078e+08 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.871336e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index 43f72c2971..abd64571cc 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx - -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 + make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-03_03:46:31 +DATE: 2024-10-04_14:14:36 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.8258s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7848s - [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.6834s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6554s + [COUNTERS] Fortran MEs ( 1 ) : 0.0280s for 8192 events => throughput is 2.92E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4457s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s - [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3340s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3061s + [COUNTERS] Fortran MEs ( 1 ) : 0.0280s for 8192 events => throughput is 2.93E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9606s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5512s - [COUNTERS] Fortran MEs ( 1 ) : 0.4094s for 81920 events => throughput is 2.00E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3180s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0370s + [COUNTERS] Fortran MEs ( 1 ) : 0.2810s for 81920 events => throughput is 2.92E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846964] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4492s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4058s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0430s for 8192 events => throughput is 1.91E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3736s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 8192 events => throughput is 2.59E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846964) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846950) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444664] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9704s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5382s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4318s for 81920 events => throughput is 1.90E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.5164s + [COUNTERS] Fortran Overhead ( 0 ) : 1.2023s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3139s for 81920 events => throughput is 2.61E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444664) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.872222e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.678942e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.933993e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.090787e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846943] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4300s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4050s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3336s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3148s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 8192 events => throughput is 4.41E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846957) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846943) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.8024s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5570s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2451s for 81920 events => throughput is 3.34E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.2596s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0744s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1850s for 81920 events => throughput is 4.43E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.358555e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.526680e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.302135e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.549791e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846943] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4222s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4069s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0149s for 8192 events => throughput is 5.48E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3237s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3124s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 8192 events => throughput is 7.41E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846943) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -329,110 +329,36 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6905s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5406s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1495s for 81920 events => throughput is 5.48E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.1545s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0467s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1076s for 81920 events => throughput is 7.62E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.319188e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.827533e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.338203e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4218s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4072s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.75E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.895903e+05 ) sec^-1 -OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6848s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5455s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1390s for 81920 events => throughput is 5.89E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.862092e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.876638e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,9 +370,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -454,20 +380,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4377s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4136s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.45E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.5972s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5827s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.641911695846950) and hip (44.641911695846950) differ by less than 3E-14 (0.0) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -479,89 +405,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7587s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5445s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2138s for 81920 events => throughput is 3.83E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.605581e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.598085e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8470s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8431s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s - -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.641911695846957) and cuda (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -569,59 +415,57 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9864s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9768s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 81920 events => throughput is 9.29E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 1.3346s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3107s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0168s for 81920 events => throughput is 4.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0072s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cuda (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (44.473264592444679) and hip (44.473264592444679) differ by less than 3E-14 (0.0) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.051887e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.490585e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.338765e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.422055e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.900263e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.729175e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.747078e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.118093e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.880130e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.733332e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.996058e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.908378e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.898528e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.723052e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.732046e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.108063e+06 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index ed21485c0d..e7d3a0ecd8 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone - +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-03_03:46:59 +DATE: 2024-10-04_14:14:57 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.8170s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7765s - [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5798s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5518s + [COUNTERS] Fortran MEs ( 1 ) : 0.0280s for 8192 events => throughput is 2.92E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4546s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4109s - [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.88E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3297s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3015s + [COUNTERS] Fortran MEs ( 1 ) : 0.0282s for 8192 events => throughput is 2.90E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9363s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5280s - [COUNTERS] Fortran MEs ( 1 ) : 0.4083s for 81920 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3339s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0465s + [COUNTERS] Fortran MEs ( 1 ) : 0.2874s for 81920 events => throughput is 2.85E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641906072918047] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641905397892330] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4470s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4067s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0401s for 8192 events => throughput is 2.04E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4267s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3987s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 8192 events => throughput is 2.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641906072918047) differ by less than 4E-4 (1.2595627507661078e-07) +OK! xsec from fortran (44.641911695846950) and cpp (44.641905397892330) differ by less than 4E-4 (1.4107717127842534e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473258789404959] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473258075185306] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9462s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5411s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4048s for 81920 events => throughput is 2.02E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.3244s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0461s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2781s for 81920 events => throughput is 2.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473258789404959) differ by less than 4E-4 (1.3048378089131063e-07) +OK! xsec from fortran (44.473264592444679) and cpp (44.473258075185306) differ by less than 4E-4 (1.465433093761348e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.996508e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.992620e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.026268e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.033930e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641902189470080] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641902617887730] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4206s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 8192 events => throughput is 5.00E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3220s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3089s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641902189470080) differ by less than 4E-4 (2.1294735186305758e-07) +OK! xsec from fortran (44.641911695846950) and cpp (44.641902617887730) differ by less than 4E-4 (2.0335059314202653e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473255074265531] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473255619824656] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7742s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6011s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1728s for 81920 events => throughput is 4.74E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.1816s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0519s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1296s for 81920 events => throughput is 6.32E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473255074265531) differ by less than 4E-4 (2.1402024852346102e-07) +OK! xsec from fortran (44.473264592444679) and cpp (44.473255619824656) differ by less than 4E-4 (2.0175312298587045e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.652600e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.559069e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.627498e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.495969e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641902771385062] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4300s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4204s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 8192 events => throughput is 8.75E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.3141s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3075s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07) +OK! xsec from fortran (44.641911695846950) and cpp (44.641902771385062) differ by less than 4E-4 (1.9991218003223565e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,120 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473255186065366] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6340s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5463s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0874s for 81920 events => throughput is 9.37E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.1050s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0407s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0642s for 81920 events => throughput is 1.28E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07) +OK! xsec from fortran (44.473264592444679) and cpp (44.473255186065366) differ by less than 4E-4 (2.1150638251921094e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.151357e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.271021e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.236288e+05 ) sec^-1 - -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4108s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4024s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.01E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.195524e+06 ) sec^-1 -OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07) +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6230s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5395s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0833s for 81920 events => throughput is 9.84E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.906699e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.013538e+06 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,30 +370,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641906399820272] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641905467548966] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4217s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4097s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0117s for 8192 events => throughput is 6.98E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.6207s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6070s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 8192 events => throughput is 1.72E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641906399820272) differ by less than 4E-4 (1.1863351012664225e-07) +OK! xsec from fortran (44.641911695846950) and hip (44.641905467548966) differ by less than 4E-4 (1.3951682953372568e-07) -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -479,149 +405,67 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473258854390501] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473257658055729] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6717s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5584s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1131s for 81920 events => throughput is 7.24E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473258854390501) differ by less than 4E-4 (1.2902255375202287e-07) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + [COUNTERS] PROGRAM TOTAL : 1.4238s + [COUNTERS] Fortran Overhead ( 0 ) : 1.4066s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 81920 events => throughput is 9.81E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0088s -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.876658e+05 ) sec^-1 +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.015744e+05 ) sec^-1 +OK! xsec from fortran (44.473264592444679) and hip (44.473257658055729) differ by less than 4E-4 (1.5592264279717938e-07) -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641910992291372] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8376s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8340s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.641911695846957) and cuda (44.641910992291372) differ by less than 4E-4 (1.575997887748315e-08) - -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical - -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473262664842089] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9938s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9852s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.02E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s - -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cuda (44.473262664842089) differ by less than 4E-4 (4.334295222729878e-08) - -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.110624e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.787408e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.475370e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.796448e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.948933e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.375567e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.365477e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.746471e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.962850e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.552641e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.369650e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.832561e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.634262e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.125599e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.047453e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.244604e+07 ) sec^-1 TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 14485e47cc..18c795f9eb 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-03_03:47:26 +DATE: 2024-10-04_14:15:14 -On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: -Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.8264s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7844s - [COUNTERS] Fortran MEs ( 1 ) : 0.0420s for 8192 events => throughput is 1.95E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5725s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5438s + [COUNTERS] Fortran MEs ( 1 ) : 0.0287s for 8192 events => throughput is 2.85E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4404s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4003s - [COUNTERS] Fortran MEs ( 1 ) : 0.0401s for 8192 events => throughput is 2.04E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.3510s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3222s + [COUNTERS] Fortran MEs ( 1 ) : 0.0289s for 8192 events => throughput is 2.84E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/4 +Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9467s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5401s - [COUNTERS] Fortran MEs ( 1 ) : 0.4066s for 81920 events => throughput is 2.01E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.3713s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0790s + [COUNTERS] Fortran MEs ( 1 ) : 0.2923s for 81920 events => throughput is 2.80E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912938404211] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4496s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4055s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 8192 events => throughput is 1.88E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3654s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3313s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 8192 events => throughput is 2.42E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08) +OK! xsec from fortran (44.641911695846950) and cpp (44.641912938404211) differ by less than 2E-4 (2.783387209603916e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,28 +169,28 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.9926s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5534s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4388s for 81920 events => throughput is 1.87E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 1.4488s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1187s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3299s for 81920 events => throughput is 2.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) +OK! xsec from fortran (44.473264592444679) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.887986e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.326687e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.905348e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.596950e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912938404225] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4296s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4052s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 8192 events => throughput is 3.41E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.3417s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3211s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.01E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08) +OK! xsec from fortran (44.641911695846950) and cpp (44.641912938404225) differ by less than 2E-4 (2.7833872318083763e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473265850735238] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7806s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5419s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 81920 events => throughput is 3.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.2982s + [COUNTERS] Fortran Overhead ( 0 ) : 1.1031s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1949s for 81920 events => throughput is 4.20E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) +OK! xsec from fortran (44.473264592444679) and cpp (44.473265850735238) differ by less than 2E-4 (2.8293190679207214e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.451620e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.481485e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.293351e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.531906e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912966309015] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4164s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4011s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.46E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.3413s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3305s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 8192 events => throughput is 7.70E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) +OK! xsec from fortran (44.641911695846950) and cpp (44.641912966309015) differ by less than 2E-4 (2.8458952971988083e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,120 +319,46 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473265882025295] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7047s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5555s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1489s for 81920 events => throughput is 5.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 1.1605s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0547s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1056s for 81920 events => throughput is 7.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) +OK! xsec from fortran (44.473264592444679) and cpp (44.473265882025295) differ by less than 2E-4 (2.899676077028346e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.376926e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.023285e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.203989e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.043041e+05 ) sec^-1 -*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4271s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4126s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.77E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s +*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** -OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) +*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** -*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.6888s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5521s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1364s for 81920 events => throughput is 6.01E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) - -*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.987404e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.016756e+05 ) sec^-1 - -*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -444,89 +370,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 - [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4227s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4021s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0202s for 8192 events => throughput is 4.06E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) - -*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical - -*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -81920 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' -DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 - [NGOODHEL] ngoodhel/ncomb = 16/16 - [XSECTION] VECSIZE_USED = 8192 - [XSECTION] MultiChannel = TRUE - [XSECTION] Configuration = 1 - [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 - [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.7481s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5428s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2049s for 81920 events => throughput is 4.00E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s - -*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** - -OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) - -*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** - -OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical - -*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.718921e+05 ) sec^-1 - -*** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.666481e+05 ) sec^-1 - -*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** --------------------- -CUDACPP_RUNTIME_FBRIDGEMODE = (not set) -CUDACPP_RUNTIME_VECSIZEUSED = 8192 --------------------- -8192 1 1 ! Number of events and max and min iterations -0.000001 ! Accuracy (ignored because max iterations = min iterations) -0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) -1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) -0 ! Helicity Sum/event 0=exact -1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) --------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' -DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -534,20 +380,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911674225568] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.8408s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8369s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.62E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.6047s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5899s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s -*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846957) and cuda (44.641911674225568) differ by less than 2E-4 (4.843293543999039e-10) +OK! xsec from fortran (44.641911695846950) and hip (44.641911674225568) differ by less than 2E-4 (4.843292433776014e-10) -*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical +OK! events.lhe.hip.1 and events.lhe.ref.1 are identical -*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -559,9 +405,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [OPENMPTH] omp_get_max_threads/nproc = 1/128 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -569,59 +415,57 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264587763374] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 2.0041s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9943s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.12E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 1.3396s + [COUNTERS] Fortran Overhead ( 0 ) : 1.3143s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0167s for 81920 events => throughput is 4.91E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s -*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444671) and cuda (44.473264587763374) differ by less than 2E-4 (1.0526091109852587e-10) +OK! xsec from fortran (44.473264592444679) and hip (44.473264587763374) differ by less than 2E-4 (1.0526113314313079e-10) -*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical +OK! events.lhe.hip.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.043134e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.486525e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.399822e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.410712e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.879175e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.737904e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.546320e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.127465e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.879320e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.738843e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.922385e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.899456e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.873490e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.713230e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.754671e+07 ) sec^-1 - -*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.120015e+06 ) sec^-1 TEST COMPLETED From 0524cd1e772098c2faf280dd4b3c07f0f1193918 Mon Sep 17 00:00:00 2001 From: Andrea Valassi Date: Fri, 4 Oct 2024 18:13:26 +0300 Subject: [PATCH 11/11] [amd] ** COMPLETE AMD** go back to tput/tmad test logs from itscrd90 Revert "[amd] rerun 30 tmad tests on LUMI worker node (small-g 72h) - no change (heft fails #833, skip ggttggg #933)" This reverts commit 07c2a535b2714fc44495fcfc8ecaa72e4f06038e. Revert "[amd] rerun 96 tput builds and tests on LUMI worker node (small-g 72h) with the workaround for HIP FPEs #1011 - now all tests succeed" This reverts commit 0ec8c1cb53c1197d416ccee4ceda5bd1f19d519f. --- .../log_eemumu_mad_d_inl0_hrd0.txt | 444 +++++++++++----- .../log_eemumu_mad_f_inl0_hrd0.txt | 466 +++++++++++------ .../log_eemumu_mad_m_inl0_hrd0.txt | 458 ++++++++++------ .../log_ggtt_mad_d_inl0_hrd0.txt | 450 ++++++++++------ .../log_ggtt_mad_f_inl0_hrd0.txt | 460 ++++++++++------ .../log_ggtt_mad_m_inl0_hrd0.txt | 454 ++++++++++------ .../log_ggttg_mad_d_inl0_hrd0.txt | 462 ++++++++++------ .../log_ggttg_mad_f_inl0_hrd0.txt | 464 +++++++++++------ .../log_ggttg_mad_m_inl0_hrd0.txt | 462 ++++++++++------ .../log_ggttgg_mad_d_inl0_hrd0.txt | 462 ++++++++++------ .../log_ggttgg_mad_f_inl0_hrd0.txt | 463 ++++++++++------ .../log_ggttgg_mad_m_inl0_hrd0.txt | 464 +++++++++++------ .../log_ggttggg_mad_d_inl0_hrd0.txt | 488 +++++++++++++---- .../log_ggttggg_mad_f_inl0_hrd0.txt | 492 ++++++++++++++---- .../log_ggttggg_mad_m_inl0_hrd0.txt | 488 +++++++++++++---- .../log_gqttq_mad_d_inl0_hrd0.txt | 466 +++++++++++------ .../log_gqttq_mad_f_inl0_hrd0.txt | 466 +++++++++++------ .../log_gqttq_mad_m_inl0_hrd0.txt | 466 +++++++++++------ .../log_heftggbb_mad_d_inl0_hrd0.txt | 462 ++++++++++------ .../log_heftggbb_mad_f_inl0_hrd0.txt | 100 ++-- .../log_heftggbb_mad_m_inl0_hrd0.txt | 474 +++++++++++------ .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 462 ++++++++++------ .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 464 +++++++++++------ .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 466 +++++++++++------ .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 448 ++++++++++------ .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 456 ++++++++++------ .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 458 ++++++++++------ .../log_susyggtt_mad_d_inl0_hrd0.txt | 456 ++++++++++------ .../log_susyggtt_mad_f_inl0_hrd0.txt | 462 ++++++++++------ .../log_susyggtt_mad_m_inl0_hrd0.txt | 456 ++++++++++------ .../log_eemumu_mad_d_inl0_hrd0.txt | 258 +++++---- .../log_eemumu_mad_d_inl0_hrd0_bridge.txt | 276 ++++++---- .../log_eemumu_mad_d_inl0_hrd0_common.txt | 244 ++++++--- .../log_eemumu_mad_d_inl0_hrd0_rmbhst.txt | 261 ++++++---- .../log_eemumu_mad_d_inl0_hrd1.txt | 254 +++++---- .../log_eemumu_mad_d_inl1_hrd0.txt | 258 +++++---- .../log_eemumu_mad_d_inl1_hrd1.txt | 258 +++++---- .../log_eemumu_mad_f_inl0_hrd0.txt | 268 ++++++---- .../log_eemumu_mad_f_inl0_hrd0_bridge.txt | 284 ++++++---- .../log_eemumu_mad_f_inl0_hrd0_common.txt | 254 +++++---- .../log_eemumu_mad_f_inl0_hrd0_rmbhst.txt | 269 ++++++---- .../log_eemumu_mad_f_inl0_hrd1.txt | 268 ++++++---- .../log_eemumu_mad_f_inl1_hrd0.txt | 268 ++++++---- .../log_eemumu_mad_f_inl1_hrd1.txt | 268 ++++++---- .../log_eemumu_mad_m_inl0_hrd0.txt | 254 +++++---- .../log_eemumu_mad_m_inl0_hrd1.txt | 254 +++++---- .../log_ggtt_mad_d_inl0_hrd0.txt | 254 +++++---- .../log_ggtt_mad_d_inl0_hrd0_bridge.txt | 272 ++++++---- .../log_ggtt_mad_d_inl0_hrd0_common.txt | 240 ++++++--- .../log_ggtt_mad_d_inl0_hrd0_rmbhst.txt | 257 +++++---- .../log_ggtt_mad_d_inl0_hrd1.txt | 254 +++++---- .../log_ggtt_mad_d_inl1_hrd0.txt | 254 +++++---- .../log_ggtt_mad_d_inl1_hrd1.txt | 254 +++++---- .../log_ggtt_mad_f_inl0_hrd0.txt | 272 ++++++---- .../log_ggtt_mad_f_inl0_hrd0_bridge.txt | 290 +++++++---- .../log_ggtt_mad_f_inl0_hrd0_common.txt | 264 ++++++---- .../log_ggtt_mad_f_inl0_hrd0_rmbhst.txt | 275 ++++++---- .../log_ggtt_mad_f_inl0_hrd1.txt | 272 ++++++---- .../log_ggtt_mad_f_inl1_hrd0.txt | 272 ++++++---- .../log_ggtt_mad_f_inl1_hrd1.txt | 272 ++++++---- .../log_ggtt_mad_m_inl0_hrd0.txt | 258 +++++---- .../log_ggtt_mad_m_inl0_hrd1.txt | 258 +++++---- .../log_ggttg_mad_d_inl0_hrd0.txt | 293 +++++++---- .../log_ggttg_mad_d_inl0_hrd0_bridge.txt | 315 ++++++----- .../log_ggttg_mad_d_inl0_hrd1.txt | 293 +++++++---- .../log_ggttg_mad_f_inl0_hrd0.txt | 301 ++++++----- .../log_ggttg_mad_f_inl0_hrd0_bridge.txt | 323 +++++++----- .../log_ggttg_mad_f_inl0_hrd1.txt | 301 ++++++----- .../log_ggttg_mad_m_inl0_hrd0.txt | 281 ++++++---- .../log_ggttg_mad_m_inl0_hrd1.txt | 281 ++++++---- .../log_ggttgg_mad_d_inl0_hrd0.txt | 285 ++++++---- .../log_ggttgg_mad_d_inl0_hrd0_bridge.txt | 307 +++++++---- .../log_ggttgg_mad_d_inl0_hrd0_common.txt | 269 ++++++---- .../log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt | 290 +++++++---- .../log_ggttgg_mad_d_inl0_hrd1.txt | 285 ++++++---- .../log_ggttgg_mad_d_inl1_hrd0.txt | 289 ++++++---- .../log_ggttgg_mad_d_inl1_hrd1.txt | 293 +++++++---- .../log_ggttgg_mad_f_inl0_hrd0.txt | 301 ++++++----- .../log_ggttgg_mad_f_inl0_hrd0_bridge.txt | 323 +++++++----- .../log_ggttgg_mad_f_inl0_hrd0_common.txt | 295 +++++++---- .../log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt | 306 ++++++----- .../log_ggttgg_mad_f_inl0_hrd1.txt | 299 +++++++---- .../log_ggttgg_mad_f_inl1_hrd0.txt | 297 +++++++---- .../log_ggttgg_mad_f_inl1_hrd1.txt | 297 +++++++---- .../log_ggttgg_mad_m_inl0_hrd0.txt | 281 ++++++---- .../log_ggttgg_mad_m_inl0_hrd1.txt | 281 ++++++---- .../log_ggttggg_mad_d_inl0_hrd0.txt | 237 ++++++--- .../log_ggttggg_mad_d_inl0_hrd0_bridge.txt | 261 +++++++--- .../log_ggttggg_mad_d_inl0_hrd1.txt | 237 ++++++--- .../log_ggttggg_mad_f_inl0_hrd0.txt | 251 ++++++--- .../log_ggttggg_mad_f_inl0_hrd0_bridge.txt | 275 +++++++--- .../log_ggttggg_mad_f_inl0_hrd1.txt | 251 ++++++--- .../log_ggttggg_mad_m_inl0_hrd0.txt | 233 ++++++--- .../log_ggttggg_mad_m_inl0_hrd1.txt | 233 ++++++--- .../log_gqttq_mad_d_inl0_hrd0.txt | 281 ++++++---- .../log_gqttq_mad_d_inl0_hrd0_bridge.txt | 303 +++++++---- .../log_gqttq_mad_d_inl0_hrd1.txt | 281 ++++++---- .../log_gqttq_mad_f_inl0_hrd0.txt | 297 +++++++---- .../log_gqttq_mad_f_inl0_hrd0_bridge.txt | 319 +++++++----- .../log_gqttq_mad_f_inl0_hrd1.txt | 297 +++++++---- .../log_gqttq_mad_m_inl0_hrd0.txt | 277 ++++++---- .../log_gqttq_mad_m_inl0_hrd1.txt | 277 ++++++---- .../log_heftggbb_mad_d_inl0_hrd0.txt | 254 +++++---- .../log_heftggbb_mad_d_inl0_hrd1.txt | 254 +++++---- .../log_heftggbb_mad_f_inl0_hrd0.txt | 270 ++++++---- .../log_heftggbb_mad_f_inl0_hrd1.txt | 272 ++++++---- .../log_heftggbb_mad_m_inl0_hrd0.txt | 252 +++++---- .../log_heftggbb_mad_m_inl0_hrd1.txt | 252 +++++---- .../log_smeftggtttt_mad_d_inl0_hrd0.txt | 281 ++++++---- .../log_smeftggtttt_mad_d_inl0_hrd1.txt | 281 ++++++---- .../log_smeftggtttt_mad_f_inl0_hrd0.txt | 301 ++++++----- .../log_smeftggtttt_mad_f_inl0_hrd1.txt | 301 ++++++----- .../log_smeftggtttt_mad_m_inl0_hrd0.txt | 281 ++++++---- .../log_smeftggtttt_mad_m_inl0_hrd1.txt | 281 ++++++---- .../log_susyggt1t1_mad_d_inl0_hrd0.txt | 250 +++++---- .../log_susyggt1t1_mad_d_inl0_hrd1.txt | 250 +++++---- .../log_susyggt1t1_mad_f_inl0_hrd0.txt | 262 ++++++---- .../log_susyggt1t1_mad_f_inl0_hrd1.txt | 262 ++++++---- .../log_susyggt1t1_mad_m_inl0_hrd0.txt | 254 +++++---- .../log_susyggt1t1_mad_m_inl0_hrd1.txt | 254 +++++---- .../log_susyggtt_mad_d_inl0_hrd0.txt | 258 +++++---- .../log_susyggtt_mad_d_inl0_hrd1.txt | 254 +++++---- .../log_susyggtt_mad_f_inl0_hrd0.txt | 270 ++++++---- .../log_susyggtt_mad_f_inl0_hrd1.txt | 270 ++++++---- .../log_susyggtt_mad_m_inl0_hrd0.txt | 254 +++++---- .../log_susyggtt_mad_m_inl0_hrd1.txt | 254 +++++---- 126 files changed, 25905 insertions(+), 13816 deletions(-) diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index e5f1acd639..9b0b9f8c70 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-10-04_11:57:12 +DATE: 2024-10-02_23:58:28 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.4787s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4734s - [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7338s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7265s + [COUNTERS] Fortran MEs ( 1 ) : 0.0074s for 8192 events => throughput is 1.11E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1354s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1301s - [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2177s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2099s + [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3495s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2965s - [COUNTERS] Fortran MEs ( 1 ) : 0.0530s for 81920 events => throughput is 1.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7144s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6411s + [COUNTERS] Fortran MEs ( 1 ) : 0.0732s for 81920 events => throughput is 1.12E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,14 +134,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1336s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.48E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2160s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2089s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0068s for 8192 events => throughput is 1.20E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,10 +169,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3513s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2963s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0549s for 81920 events => throughput is 1.49E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.7098s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6394s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0701s for 81920 events => throughput is 1.17E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -183,14 +183,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.482917e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155936e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.528805e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.172560e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,14 +214,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1387s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1349s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0037s for 8192 events => throughput is 2.20E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2151s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2107s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.94E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (3.3306690738754696e-16) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,9 +239,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -249,10 +249,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3310s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2967s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0342s for 81920 events => throughput is 2.40E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6961s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6516s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0441s for 81920 events => throughput is 1.86E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -263,14 +263,14 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.513769e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.918531e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.535871e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.020683e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,9 +284,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -294,14 +294,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1370s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1345s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.40E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2148s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2112s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0034s for 8192 events => throughput is 2.44E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -329,10 +329,10 @@ DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3204s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2964s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0239s for 81920 events => throughput is 3.43E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6734s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0320s for 81920 events => throughput is 2.56E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** @@ -343,22 +343,96 @@ OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002 OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.709801e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.548719e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.792075e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.718686e+06 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.2119s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2083s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.47E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6695s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6378s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 81920 events => throughput is 2.60E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.686657e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.772609e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +444,110 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.4096s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3963s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.56E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0080s + [COUNTERS] PROGRAM TOTAL : 0.2162s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2118s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0042s for 8192 events => throughput is 1.97E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432789448173971E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6858s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6475s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0380s for 81920 events => throughput is 2.16E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519892E-002) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.112929e+06 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.169699e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.6439s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6405s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.81E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.5811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5641s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0096s for 81920 events => throughput is 8.49E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0074s + [COUNTERS] PROGRAM TOTAL : 1.0816s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0737s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0074s for 81920 events => throughput is 1.11E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711103909519892E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103909519892E-002) differ by less than 3E-14 (0.0) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.692916e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.312523e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.782692e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.728376e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.860215e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.551104e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.103935e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.941874e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.861582e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.534696e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.118406e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.933441e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.829015e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.510361e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.606029e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.195345e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index d284b6241b..05be9e9d6c 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-10-04_11:57:22 +DATE: 2024-10-02_23:58:46 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.4699s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4645s - [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7495s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7422s + [COUNTERS] Fortran MEs ( 1 ) : 0.0073s for 8192 events => throughput is 1.13E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1380s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1326s - [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2243s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2166s + [COUNTERS] Fortran MEs ( 1 ) : 0.0078s for 8192 events => throughput is 1.05E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3567s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3034s - [COUNTERS] Fortran MEs ( 1 ) : 0.0532s for 81920 events => throughput is 1.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7353s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6581s + [COUNTERS] Fortran MEs ( 1 ) : 0.0772s for 81920 events => throughput is 1.06E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432776035199060E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432777382586498E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1374s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1327s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.78E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2248s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2172s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432776035199060E-002) differ by less than 4E-4 (1.4511057155885965e-07) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432777382586498E-002) differ by less than 4E-4 (1.305336294610271e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711090687154856E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711091925143637E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3430s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2974s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0455s for 81920 events => throughput is 1.80E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.7074s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6411s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0661s for 81920 events => throughput is 1.24E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711090687154856E-002) differ by less than 4E-4 (1.4417409099909406e-07) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711091925143637E-002) differ by less than 4E-4 (1.3067530257870885e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.920464e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.208440e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.927577e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.231118e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432793908398633E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432774839452045E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1355s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1334s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0020s for 8192 events => throughput is 4.06E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2102s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0026s for 8192 events => throughput is 3.17E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432793908398633E-002) differ by less than 4E-4 (4.8253706141920816e-08) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774839452045E-002) differ by less than 4E-4 (1.5804696607002455e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711108423277371E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711089416628339E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3208s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3010s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 81920 events => throughput is 4.15E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6669s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6399s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0268s for 81920 events => throughput is 3.06E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711108423277371E-002) differ by less than 4E-4 (4.921713170347175e-08) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089416628339E-002) differ by less than 4E-4 (1.5802766439865223e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.453098e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.117302e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.598556e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.242056e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432793820194981E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1355s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1336s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.53E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2112s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.24E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432793820194981E-002) differ by less than 4E-4 (4.729945990433748e-08) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711108407854763E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3146s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2969s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0176s for 81920 events => throughput is 4.66E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6658s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6410s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 81920 events => throughput is 3.32E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711108407854763E-002) differ by less than 4E-4 (4.904896666602099e-08) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.628047e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.403974e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.601663e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.2122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2097s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0024s for 8192 events => throughput is 3.46E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6715s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6476s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 81920 events => throughput is 3.46E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.561752e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.051156e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.728317e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432778556608516E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.2152s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2123s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.00E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432778556608516E-002) differ by less than 4E-4 (1.1783227071848756e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711093118690828E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6740s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6481s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0257s for 81920 events => throughput is 3.19E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711093118690828E-002) differ by less than 4E-4 (1.1766109664357316e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.431784e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.540493e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432778459280288E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432780016531851E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.4133s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4014s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 8192 events => throughput is 1.71E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0071s + [COUNTERS] PROGRAM TOTAL : 0.6457s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6423s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.85E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432778459280288E-002) differ by less than 4E-4 (1.1888523265835005e-07) +OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432780016531851E-002) differ by less than 4E-4 (1.0203783951112655e-07) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711093172690286E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711094767039689E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.5755s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5604s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0075s for 81920 events => throughput is 1.09E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0076s + [COUNTERS] PROGRAM TOTAL : 1.0769s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0691s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0072s for 81920 events => throughput is 1.13E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711093172690286E-002) differ by less than 4E-4 (1.1707229707891287e-07) +OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711094767039689E-002) differ by less than 4E-4 (9.968782199720749e-08) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.835558e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.450419e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.780130e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.716246e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.126928e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.468932e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.638837e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.284727e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.019568e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.811258e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.427394e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.220962e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.528018e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.347565e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.846143e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.807469e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 249ba624f2..ceb72487c4 100644 --- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' OMP_NUM_THREADS= -DATE: 2024-10-04_11:57:33 +DATE: 2024-10-02_23:59:05 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 3837 events (found 8192 events) - [COUNTERS] PROGRAM TOTAL : 0.4948s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4894s - [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7127s + [COUNTERS] Fortran MEs ( 1 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x1_fortran > /tmp/valassia/output_eemumu_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/avalassi/output_eemumu_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1397s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1340s - [COUNTERS] Fortran MEs ( 1 ) : 0.0057s for 8192 events => throughput is 1.44E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.2141s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2067s + [COUNTERS] Fortran MEs ( 1 ) : 0.0074s for 8192 events => throughput is 1.10E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/valassia/output_eemumu_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/avalassi/output_eemumu_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_eemumu_x10_fortran > /tmp/v [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3525s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2992s - [COUNTERS] Fortran MEs ( 1 ) : 0.0534s for 81920 events => throughput is 1.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.7093s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6362s + [COUNTERS] Fortran MEs ( 1 ) : 0.0731s for 81920 events => throughput is 1.12E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,14 +134,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1432s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1374s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0057s for 8192 events => throughput is 1.44E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2165s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2089s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 8192 events => throughput is 1.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448297203334505e-11) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103904317942E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3516s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2972s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0543s for 81920 events => throughput is 1.51E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.7136s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6421s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0712s for 81920 events => throughput is 1.15E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317942E-002) differ by less than 2E-4 (5.672107228349432e-11) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.513314e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.143586e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.609489e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.169403e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,14 +214,14 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1385s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1348s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0036s for 8192 events => throughput is 2.26E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2119s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2075s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0041s for 8192 events => throughput is 1.99E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448297203334505e-11) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103904317942E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3296s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2962s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0334s for 81920 events => throughput is 2.46E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6805s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6396s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0407s for 81920 events => throughput is 2.01E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317942E-002) differ by less than 2E-4 (5.672107228349432e-11) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.484196e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.010636e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.644939e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.105629e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789444494401E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.1372s - [COUNTERS] Fortran Overhead ( 0 ) : 0.1346s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.25E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.2123s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.65E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and cpp (9.2432789444494401E-002) differ by less than 2E-4 (3.980804574865715e-11) +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09171 [9.1711103899063479E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.3241s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3000s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 81920 events => throughput is 3.41E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.6720s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6400s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 81920 events => throughput is 2.58E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063479E-002) differ by less than 2E-4 (1.1401468658078784e-10) +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.551066e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.589631e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.649265e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.2135s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2101s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.58E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6783s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6471s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0310s for 81920 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.654351e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.728629e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.743225e+06 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' +DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1589 events (found 1593 events) + [COUNTERS] PROGRAM TOTAL : 0.2155s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2113s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0039s for 8192 events => throughput is 2.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' +DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 1655 events (found 1660 events) + [COUNTERS] PROGRAM TOTAL : 0.6837s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6466s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0369s for 81920 events => throughput is 2.22E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.209789e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.334386e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x1_cudacpp > /tmp/valassia/output_eemumu_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x1_cudacpp > /tmp/avalassi/output_eemumu_x1_cudacpp' DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.09243 [9.2432789437826984E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.09243 [9.2432789437826970E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1589 events (found 1593 events) - [COUNTERS] PROGRAM TOTAL : 0.4332s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4198s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0053s for 8192 events => throughput is 1.56E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0082s + [COUNTERS] PROGRAM TOTAL : 0.6444s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6410s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.2432789448173971E-002) and hip (9.2432789437826984E-002) differ by less than 2E-4 (1.1194067894848558e-10) +OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789437826970E-002) differ by less than 2E-4 (1.1194101201539297e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_eemumu_x10_cudacpp > /tmp/valassia/output_eemumu_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp' DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 16/16 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 [XSECTION] Cross section = 0.09171 [9.1711103901050417E-002] fbridge_mode=1 [UNWEIGHT] Wrote 1655 events (found 1660 events) - [COUNTERS] PROGRAM TOTAL : 0.6023s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5839s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 81920 events => throughput is 8.18E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0084s + [COUNTERS] PROGRAM TOTAL : 1.0867s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0073s for 81920 events => throughput is 1.12E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (9.1711103909519892E-002) and hip (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11) +OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.703628e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.281389e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.701689e+06 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.611764e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.862061e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.513316e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.148327e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.841595e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.835643e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.527747e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.122290e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.926367e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.837124e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.529012e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.604412e+07 ) sec^-1 +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.175131e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 4fdc427195..fcf8054bf9 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-04_11:57:44 +DATE: 2024-10-02_23:59:24 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.7026s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6744s - [COUNTERS] Fortran MEs ( 1 ) : 0.0283s for 8192 events => throughput is 2.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8251s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7830s + [COUNTERS] Fortran MEs ( 1 ) : 0.0421s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3192s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2913s - [COUNTERS] Fortran MEs ( 1 ) : 0.0279s for 8192 events => throughput is 2.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4396s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3999s + [COUNTERS] Fortran MEs ( 1 ) : 0.0397s for 8192 events => throughput is 2.06E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.3222s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0423s - [COUNTERS] Fortran MEs ( 1 ) : 0.2799s for 81920 events => throughput is 2.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9664s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5519s + [COUNTERS] Fortran MEs ( 1 ) : 0.4145s for 81920 events => throughput is 1.98E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034155] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3245s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2934s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0309s for 8192 events => throughput is 2.65E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4442s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4004s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0434s for 8192 events => throughput is 1.89E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034155) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268150] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.3616s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0499s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3114s for 81920 events => throughput is 2.63E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9642s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5311s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4327s for 81920 events => throughput is 1.89E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268150) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.686383e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.924342e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.686277e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.859061e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611968034155] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3153s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2963s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0188s for 8192 events => throughput is 4.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4245s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3996s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.34E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034155) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268164] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2305s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0455s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1849s for 81920 events => throughput is 4.43E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7761s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5323s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2434s for 81920 events => throughput is 3.37E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268164) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.283365e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.358630e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.559050e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.362585e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,9 +284,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -294,10 +294,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3048s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2938s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0108s for 8192 events => throughput is 7.57E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4184s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4023s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.19E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -319,46 +319,120 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.1524s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0447s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1076s for 81920 events => throughput is 7.61E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.6958s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5437s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1518s for 81920 events => throughput is 5.40E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144596232268178) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.338241e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.300976e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.689330e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.342527e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4131s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3987s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0140s for 8192 events => throughput is 5.86E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.6781s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5398s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1379s for 81920 events => throughput is 5.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.813432e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.843429e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,9 +444,89 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138611968034169] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4265s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4046s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0215s for 8192 events => throughput is 3.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034169) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.7483s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5336s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2144s for 81920 events => throughput is 3.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.672595e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.764683e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -380,20 +534,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034176] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.5974s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5835s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0056s for 8192 events => throughput is 1.47E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0084s + [COUNTERS] PROGRAM TOTAL : 0.8391s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8354s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and hip (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (47.138611968034162) and cuda (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,9 +559,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -415,57 +569,59 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.3589s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3333s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0166s for 81920 events => throughput is 4.93E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s + [COUNTERS] PROGRAM TOTAL : 1.9945s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9851s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 81920 events => throughput is 9.47E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and hip (47.144596232268178) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (47.144596232268185) and cuda (47.144596232268178) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.531314e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.142986e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.422873e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.389230e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.710649e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.891641e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.082605e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.671813e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.711416e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.906867e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.861809e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.028190e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.688339e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.883975e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.996118e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.704910e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index 84ba16449e..793d082383 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' - - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-04_11:58:01 +DATE: 2024-10-02_23:59:52 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.5633s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5347s - [COUNTERS] Fortran MEs ( 1 ) : 0.0285s for 8192 events => throughput is 2.87E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8207s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7790s + [COUNTERS] Fortran MEs ( 1 ) : 0.0417s for 8192 events => throughput is 1.96E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3190s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2901s - [COUNTERS] Fortran MEs ( 1 ) : 0.0288s for 8192 events => throughput is 2.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4407s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3997s + [COUNTERS] Fortran MEs ( 1 ) : 0.0411s for 8192 events => throughput is 2.00E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.3180s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0375s - [COUNTERS] Fortran MEs ( 1 ) : 0.2805s for 81920 events => throughput is 2.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9658s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5547s + [COUNTERS] Fortran MEs ( 1 ) : 0.4111s for 81920 events => throughput is 1.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138605296829816] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138606099989779] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3195s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2920s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0274s for 8192 events => throughput is 2.99E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4386s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3970s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138605296829816) differ by less than 4E-4 (1.4152313931869998e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138606099989779) differ by less than 4E-4 (1.2448487851646206e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144592003933589] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144592707001024] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.4477s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1735s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2740s for 81920 events => throughput is 2.99E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9768s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5659s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4106s for 81920 events => throughput is 2.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144592003933589) differ by less than 4E-4 (8.968863673963767e-08) +OK! xsec from fortran (47.144596232268185) and cpp (47.144592707001024) differ by less than 4E-4 (7.477563590541081e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.062937e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.003295e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.091905e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.019987e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138602746994408] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138602111070696] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3060s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2928s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.26E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4185s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4008s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0175s for 8192 events => throughput is 4.67E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138602746994408) differ by less than 4E-4 (1.956154279669775e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138602111070696) differ by less than 4E-4 (2.091059336795098e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144589414828133] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144588828412729] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.1729s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0429s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1299s for 81920 events => throughput is 6.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.7577s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5843s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1732s for 81920 events => throughput is 4.73E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144589414828133) differ by less than 4E-4 (1.44607029572974e-07) +OK! xsec from fortran (47.144596232268185) and cpp (47.144588828412729) differ by less than 4E-4 (1.570456860111591e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.446430e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.698016e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.475352e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.733377e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138602995819163] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.2985s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2919s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4056s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3965s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 8192 events => throughput is 9.29E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138602995819163) differ by less than 4E-4 (1.9033685183522664e-07) +OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144587555291501] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.1141s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0499s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0641s for 81920 events => throughput is 1.28E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.6291s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5406s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0883s for 81920 events => throughput is 9.28E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144587555291501) differ by less than 4E-4 (1.840502910077646e-07) +OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.283651e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.052077e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.346209e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4054s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3970s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.6191s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5357s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0831s for 81920 events => throughput is 9.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.778412e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.351786e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.841904e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138606840950104] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4104s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3984s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0117s for 8192 events => throughput is 7.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cpp (47.138606840950104) differ by less than 4E-4 (1.0876612277499476e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144591429357156] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.6518s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5393s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1122s for 81920 events => throughput is 7.30E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (47.144596232268185) and cpp (47.144591429357156) differ by less than 4E-4 (1.0187617272006122e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.954474e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.797285e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138605197694872] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138612402172164] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.5747s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0044s for 8192 events => throughput is 1.85E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0125s + [COUNTERS] PROGRAM TOTAL : 0.8408s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8373s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.78E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and hip (47.138605197694872) differ by less than 4E-4 (1.4362619105146024e-07) +OK! xsec from fortran (47.138611968034162) and cuda (47.138612402172164) differ by less than 4E-4 (9.209817353195149e-09) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144590142508306] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596666727985] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.3386s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3226s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.03E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0081s + [COUNTERS] PROGRAM TOTAL : 1.9846s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9761s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 81920 events => throughput is 1.04E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and hip (47.144590142508306) differ by less than 4E-4 (1.2917195901795964e-07) +OK! xsec from fortran (47.144596232268185) and cuda (47.144596666727985) differ by less than 4E-4 (9.215473939505614e-09) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.937998e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.218541e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.882822e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.615186e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.641800e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.024967e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.950148e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.388814e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.562820e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.001710e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.033595e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.373929e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.191502e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.703628e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.262245e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.093326e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index 37fbe019f1..b1303dd832 100644 --- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-04_11:58:17 +DATE: 2024-10-03_00:00:19 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 2613 events (found 5374 events) - [COUNTERS] PROGRAM TOTAL : 0.5755s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5472s - [COUNTERS] Fortran MEs ( 1 ) : 0.0282s for 8192 events => throughput is 2.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8412s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7976s + [COUNTERS] Fortran MEs ( 1 ) : 0.0436s for 8192 events => throughput is 1.88E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/valassia/output_ggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/avalassi/output_ggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x1_fortran > /tmp/vala [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3219s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2940s - [COUNTERS] Fortran MEs ( 1 ) : 0.0280s for 8192 events => throughput is 2.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4419s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4011s + [COUNTERS] Fortran MEs ( 1 ) : 0.0408s for 8192 events => throughput is 2.01E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggtt_x10_fortran > /tmp/valassia/output_ggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/avalassi/output_ggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232268157] fbridge_mode=0 + [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.3169s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0373s - [COUNTERS] Fortran MEs ( 1 ) : 0.2796s for 81920 events => throughput is 2.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9652s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5537s + [COUNTERS] Fortran MEs ( 1 ) : 0.4115s for 81920 events => throughput is 1.99E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138613306947967] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3223s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2908s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0314s for 8192 events => throughput is 2.61E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4436s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3997s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0435s for 8192 events => throughput is 1.88E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947953) differ by less than 2E-4 (2.8403759344541868e-08) +OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947967) differ by less than 2E-4 (2.8403759566586473e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,28 +169,28 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.144597573367548] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.3872s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0719s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3151s for 81920 events => throughput is 2.60E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9985s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5558s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4423s for 81920 events => throughput is 1.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144597573367548) differ by less than 2E-4 (2.8446513367086368e-08) +OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367548) differ by less than 2E-4 (2.8446512922997158e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.676606e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.844334e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.665448e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.882466e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3084s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2897s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 8192 events => throughput is 4.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4231s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3983s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0245s for 8192 events => throughput is 3.35E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597573367527] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144597573367555] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.2271s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0426s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1844s for 81920 events => throughput is 4.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7964s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5540s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2421s for 81920 events => throughput is 3.38E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144597573367527) differ by less than 2E-4 (2.8446512922997158e-08) +OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367555) differ by less than 2E-4 (2.8446512922997158e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.461113e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.366359e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.482033e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.389089e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138613336664328] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.3025s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2917s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 8192 events => throughput is 7.73E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4109s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3956s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and cpp (47.138613336664328) differ by less than 2E-4 (2.9034163517849265e-08) +OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144597613828985] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.1475s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0421s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1053s for 81920 events => throughput is 7.78E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6933s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5438s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1491s for 81920 events => throughput is 5.49E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and cpp (47.144597613828985) differ by less than 2E-4 (2.9304754622927476e-08) +OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.016996e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.398655e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.400566e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4102s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3967s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 8192 events => throughput is 6.21E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.6744s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5361s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1380s for 81920 events => throughput is 5.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.941046e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.104743e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.965683e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1 + [UNWEIGHT] Wrote 1618 events (found 1623 events) + [COUNTERS] PROGRAM TOTAL : 0.4168s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3953s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0211s for 8192 events => throughput is 3.89E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1 + [UNWEIGHT] Wrote 1613 events (found 1618 events) + [COUNTERS] PROGRAM TOTAL : 1.7562s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5468s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2090s for 81920 events => throughput is 3.92E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.714345e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.833717e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x1_cudacpp > /tmp/valassia/output_ggtt_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.138611963547795] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.138611963547788] fbridge_mode=1 [UNWEIGHT] Wrote 1618 events (found 1623 events) - [COUNTERS] PROGRAM TOTAL : 0.5770s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5629s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0054s for 8192 events => throughput is 1.50E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s + [COUNTERS] PROGRAM TOTAL : 0.8403s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8366s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.75E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.138611968034162) and hip (47.138611963547795) differ by less than 2E-4 (9.517397980829401e-11) +OK! xsec from fortran (47.138611968034162) and cuda (47.138611963547788) differ by less than 2E-4 (9.517409083059647e-11) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggtt_x10_cudacpp > /tmp/valassia/output_ggtt_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 47.14 [47.144596232269080] fbridge_mode=1 + [XSECTION] Cross section = 47.14 [47.144596232269095] fbridge_mode=1 [UNWEIGHT] Wrote 1613 events (found 1618 events) - [COUNTERS] PROGRAM TOTAL : 1.3704s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3455s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 81920 events => throughput is 4.99E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s + [COUNTERS] PROGRAM TOTAL : 1.9861s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9767s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0087s for 81920 events => throughput is 9.38E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (47.144596232268157) and hip (47.144596232269080) differ by less than 2E-4 (1.9539925233402755e-14) +OK! xsec from fortran (47.144596232268185) and cuda (47.144596232269095) differ by less than 2E-4 (1.9317880628477724e-14) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.577917e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.100732e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.490514e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.378501e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.342087e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.877553e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.130707e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.586294e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.749737e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.878727e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.928388e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.988107e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.739049e+07 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.887451e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.133614e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.727351e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 2e40ef7bc3..46adcb615c 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-10-04_11:58:34 +DATE: 2024-10-03_00:00:48 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.5953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3908s - [COUNTERS] Fortran MEs ( 1 ) : 0.2045s for 8192 events => throughput is 4.00E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7427s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4118s + [COUNTERS] Fortran MEs ( 1 ) : 0.3309s for 8192 events => throughput is 2.48E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4766s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2714s - [COUNTERS] Fortran MEs ( 1 ) : 0.2052s for 8192 events => throughput is 3.99E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6960s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3754s + [COUNTERS] Fortran MEs ( 1 ) : 0.3206s for 8192 events => throughput is 2.56E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.2989s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2518s - [COUNTERS] Fortran MEs ( 1 ) : 2.0471s for 81920 events => throughput is 4.00E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0380s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8383s + [COUNTERS] Fortran MEs ( 1 ) : 3.1997s for 81920 events => throughput is 2.56E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5286s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2780s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2499s for 8192 events => throughput is 3.28E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.7120s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3767s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3342s for 8192 events => throughput is 2.45E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.7677s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2400s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.5271s for 81920 events => throughput is 3.24E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 5.2236s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8645s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.3580s for 81920 events => throughput is 2.44E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.415404e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.533053e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.432915e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.520171e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4061s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2791s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1265s for 8192 events => throughput is 6.47E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.5526s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3771s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1749s for 8192 events => throughput is 4.68E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748567E-002) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971656827279650E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.5077s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2459s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2614s for 81920 events => throughput is 6.49E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 3.6037s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8506s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.7524s for 81920 events => throughput is 4.67E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279650E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.651277e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.765396e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.666297e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.746828e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3419s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2790s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0626s for 8192 events => throughput is 1.31E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4648s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3764s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0877s for 8192 events => throughput is 9.34E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474251492720207E-002) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606505E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.8640s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2399s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6238s for 81920 events => throughput is 1.31E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.7094s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8326s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8763s for 81920 events => throughput is 9.35E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558171606505E-002) differ by less than 3E-14 (6.661338147750939e-16) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.358665e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.620733e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.361847e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.522409e+04 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.4555s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3767s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0783s for 8192 events => throughput is 1.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.6252s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8413s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7834s for 81920 events => throughput is 1.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.084541e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.078252e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471485809748581E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.4849s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3756s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1086s for 8192 events => throughput is 7.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748581E-002) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.9263s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8463s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0793s for 81920 events => throughput is 7.59E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.311463e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.566979e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720248E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5701s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5411s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.25E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0160s + [COUNTERS] PROGRAM TOTAL : 0.8289s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8165s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.79E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0040s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474251492720248E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606491E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971656827279636E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.6277s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5196s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0930s for 81920 events => throughput is 8.81E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0150s + [COUNTERS] PROGRAM TOTAL : 2.3368s + [COUNTERS] Fortran Overhead ( 0 ) : 2.3090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 81920 events => throughput is 3.31E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0030s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971558171606491E-002) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656827279636E-002) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.356390e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.131553e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.872981e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.559107e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.608404e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.471514e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.583700e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.165070e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.634325e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.479703e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.187650e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.174058e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.594477e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.475036e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.330851e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.650749e+06 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 1c90249307..0712f66370 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-10-04_11:59:06 +DATE: 2024-10-03_00:01:30 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.4940s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2895s - [COUNTERS] Fortran MEs ( 1 ) : 0.2045s for 8192 events => throughput is 4.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4002s + [COUNTERS] Fortran MEs ( 1 ) : 0.3198s for 8192 events => throughput is 2.56E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4785s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2742s - [COUNTERS] Fortran MEs ( 1 ) : 0.2043s for 8192 events => throughput is 4.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6959s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3743s + [COUNTERS] Fortran MEs ( 1 ) : 0.3216s for 8192 events => throughput is 2.55E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.2694s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2314s - [COUNTERS] Fortran MEs ( 1 ) : 2.0380s for 81920 events => throughput is 4.02E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0307s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8361s + [COUNTERS] Fortran MEs ( 1 ) : 3.1946s for 81920 events => throughput is 2.56E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474238393007253E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471473453718410E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5072s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2792s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2275s for 8192 events => throughput is 3.60E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + [COUNTERS] PROGRAM TOTAL : 0.6986s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3752s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3225s for 8192 events => throughput is 2.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474238393007253E-002) differ by less than 4E-4 (1.6693007842683016e-07) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471473453718410E-002) differ by less than 4E-4 (1.574588530672827e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971543373778375E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971643267110940E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.5027s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2415s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2607s for 81920 events => throughput is 3.62E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 5.0691s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8467s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.2214s for 81920 events => throughput is 2.54E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971543373778375E-002) differ by less than 4E-4 (1.8503863641328167e-07) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971643267110940E-002) differ by less than 4E-4 (1.69562182517069e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.755548e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.593703e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.745750e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.627112e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474229018345096E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471459294758378E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3505s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0716s for 8192 events => throughput is 1.14E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4748s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3764s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0979s for 8192 events => throughput is 8.37E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474229018345096E-002) differ by less than 4E-4 (2.8639171045785616e-07) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459294758378E-002) differ by less than 4E-4 (3.37893311330717e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971534528332888E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971629726281482E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.9790s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2662s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7126s for 81920 events => throughput is 1.15E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.8449s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8535s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.9910s for 81920 events => throughput is 8.27E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971534528332888E-002) differ by less than 4E-4 (2.9564602843645815e-07) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629726281482E-002) differ by less than 4E-4 (3.38882539141494e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.164275e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.427461e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.183598e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.482393e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474228627553363E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3112s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2781s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0328s for 8192 events => throughput is 2.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4277s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3831s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0442s for 8192 events => throughput is 1.85E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474228627553363E-002) differ by less than 4E-4 (2.9137158252812156e-07) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971533958864222E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.5739s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2456s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3281s for 81920 events => throughput is 2.50E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.3085s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8569s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4513s for 81920 events => throughput is 1.82E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971533958864222E-002) differ by less than 4E-4 (3.027669184252346e-07) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.558827e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.850187e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.831580e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.4196s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3788s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0404s for 8192 events => throughput is 2.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.2460s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8324s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4133s for 81920 events => throughput is 1.98E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.031384e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.580039e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.026199e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471471932611128E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.4322s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3792s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0525s for 8192 events => throughput is 1.56E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471471932611128E-002) differ by less than 4E-4 (1.768430569759616e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971639934306102E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.3632s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8352s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5276s for 81920 events => throughput is 1.55E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971639934306102E-002) differ by less than 4E-4 (2.1123700788550082e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.529803e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.472905e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474239700037612E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471475012321185E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5888s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5671s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0070s for 8192 events => throughput is 1.17E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0148s + [COUNTERS] PROGRAM TOTAL : 0.8184s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8139s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.45E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474239700037612E-002) differ by less than 4E-4 (1.5027454702831733e-07) +OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471475012321185E-002) differ by less than 4E-4 (1.375968260441951e-07) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971544830799671E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971648932322295E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.5716s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5221s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0345s for 81920 events => throughput is 2.37E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0150s + [COUNTERS] PROGRAM TOTAL : 2.2883s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2747s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0125s for 81920 events => throughput is 6.55E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971544830799671E-002) differ by less than 4E-4 (1.6681939285501102e-07) +OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971648932322295E-002) differ by less than 4E-4 (9.872194262072753e-08) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.189894e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.744391e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.062787e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.016184e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.607979e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.305157e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.880321e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.210328e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.571112e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.310024e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.534436e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.309757e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.728317e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.203011e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.018324e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.259858e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index 3b278e2325..2b4351374c 100644 --- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' OMP_NUM_THREADS= -DATE: 2024-10-04_11:59:34 +DATE: 2024-10-03_00:02:09 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 387 events (found 1591 events) - [COUNTERS] PROGRAM TOTAL : 0.4986s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2938s - [COUNTERS] Fortran MEs ( 1 ) : 0.2047s for 8192 events => throughput is 4.00E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.7145s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3961s + [COUNTERS] Fortran MEs ( 1 ) : 0.3184s for 8192 events => throughput is 2.57E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x1_fortran > /tmp/valassia/output_ggttg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/avalassi/output_ggttg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251492720207E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4854s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2811s - [COUNTERS] Fortran MEs ( 1 ) : 0.2043s for 8192 events => throughput is 4.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 0.6928s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3731s + [COUNTERS] Fortran MEs ( 1 ) : 0.3196s for 8192 events => throughput is 2.56E+04 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttg_x10_fortran > /tmp/valassia/output_ggttg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/avalassi/output_ggttg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558171606449E-002] fbridge_mode=0 + [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.2703s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2288s - [COUNTERS] Fortran MEs ( 1 ) : 2.0416s for 81920 events => throughput is 4.01E+04 events/s + [COUNTERS] PROGRAM TOTAL : 5.0430s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8391s + [COUNTERS] Fortran MEs ( 1 ) : 3.2039s for 81920 events => throughput is 2.56E+04 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474252272193679E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471486590207584E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5242s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2449s for 8192 events => throughput is 3.34E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 0.7169s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3765s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3393s for 8192 events => throughput is 2.41E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252272193679E-002) differ by less than 2E-4 (9.93285631523122e-09) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486590207584E-002) differ by less than 2E-4 (9.945765766516956e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558933520065E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971657589635384E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 3.6907s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2426s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.4474s for 81920 events => throughput is 3.35E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + [COUNTERS] PROGRAM TOTAL : 5.3096s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8692s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.4393s for 81920 events => throughput is 2.38E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558933520065E-002) differ by less than 2E-4 (9.527307387457995e-09) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589635384E-002) differ by less than 2E-4 (9.532824529756567e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.403968e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.514208e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.432539e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.505372e+04 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474252220105081E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471486540430027E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.4042s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2776s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1261s for 8192 events => throughput is 6.49E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.5510s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3773s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1730s for 8192 events => throughput is 4.74E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252220105081E-002) differ by less than 2E-4 (9.269089717989232e-09) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486540430027E-002) differ by less than 2E-4 (9.311426296676473e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558934000736E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971657589963913E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 2.5119s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2399s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.2715s for 81920 events => throughput is 6.44E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 3.5915s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8566s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.7343s for 81920 events => throughput is 4.72E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558934000736E-002) differ by less than 2E-4 (9.53331791286871e-09) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589963913E-002) differ by less than 2E-4 (9.536932576992285e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.548717e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.812710e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.561659e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.847792e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474252077403842E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.3412s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0622s for 8192 events => throughput is 1.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4686s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3818s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0862s for 8192 events => throughput is 9.50E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.8474251492720207E-002) and cpp (7.8474252077403842E-002) differ by less than 2E-4 (7.450642991457812e-09) +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,120 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558777659491E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.8704s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2513s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6188s for 81920 events => throughput is 1.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.6868s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8256s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8606s for 81920 events => throughput is 9.52E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and cpp (7.9971558777659491E-002) differ by less than 2E-4 (7.578357275050962e-09) +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.369835e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.423883e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.372187e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.654532e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.4531s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3769s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0756s for 8192 events => throughput is 1.08E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.6024s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8349s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7669s for 81920 events => throughput is 1.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.087750e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.111565e+05 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +444,110 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x1_cudacpp > /tmp/valassia/output_ggttg_x1_cudacpp' +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07847 [7.8474251477062731E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07847 [7.8471486537749241E-002] fbridge_mode=1 [UNWEIGHT] Wrote 376 events (found 1358 events) - [COUNTERS] PROGRAM TOTAL : 0.5730s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5443s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0131s for 8192 events => throughput is 6.23E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0156s + [COUNTERS] PROGRAM TOTAL : 0.4892s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3761s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1125s for 8192 events => throughput is 7.28E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486537749241E-002) differ by less than 2E-4 (9.277263846030337e-09) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07997 [7.9971657565670345E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 2267 events (found 2272 events) + [COUNTERS] PROGRAM TOTAL : 2.9498s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8348s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1145s for 81920 events => throughput is 7.35E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657565670345E-002) differ by less than 2E-4 (9.233155351395794e-09) -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! xsec from fortran (7.8474251492720207E-002) and hip (7.8474251477062731E-002) differ by less than 2E-4 (1.9952373087051e-10) +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.402526e+04 ) sec^-1 -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.355239e+04 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x1_cudacpp > /tmp/avalassi/output_ggttg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 32/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.07847 [7.8471485791426987E-002] fbridge_mode=1 + [UNWEIGHT] Wrote 376 events (found 1358 events) + [COUNTERS] PROGRAM TOTAL : 0.8194s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8081s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.81E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485791426987E-002) differ by less than 2E-4 (2.334807902570901e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttg_x10_cudacpp > /tmp/valassia/output_ggttg_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp' DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 32/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.07997 [7.9971558174786780E-002] fbridge_mode=1 + [XSECTION] Cross section = 0.07997 [7.9971656830583548E-002] fbridge_mode=1 [UNWEIGHT] Wrote 2267 events (found 2272 events) - [COUNTERS] PROGRAM TOTAL : 1.6201s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0927s for 81920 events => throughput is 8.83E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0144s + [COUNTERS] PROGRAM TOTAL : 2.3053s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2776s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0247s for 81920 events => throughput is 3.31E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0029s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.9971558171606449E-002) and hip (7.9971558174786780E-002) differ by less than 2E-4 (3.976818874207311e-11) +OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656830583548E-002) differ by less than 2E-4 (4.131384123695625e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.379182e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.136542e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.862774e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.566641e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.640817e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.411150e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.619080e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.155971e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.599391e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.424302e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.190046e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.169194e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.571067e+06 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.426806e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.329072e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.634141e+06 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index 33c968e969..ab6656c8c9 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-10-04_12:00:06 +DATE: 2024-10-03_00:02:52 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 2.8086s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2938s - [COUNTERS] Fortran MEs ( 1 ) : 2.5148s for 8192 events => throughput is 3.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4509s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2925s + [COUNTERS] Fortran MEs ( 1 ) : 4.1585s for 8192 events => throughput is 1.97E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.7299s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2214s - [COUNTERS] Fortran MEs ( 1 ) : 2.5085s for 8192 events => throughput is 3.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4534s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2814s + [COUNTERS] Fortran MEs ( 1 ) : 4.1719s for 8192 events => throughput is 1.96E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 26.5450s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3769s - [COUNTERS] Fortran MEs ( 1 ) : 25.1681s for 81920 events => throughput is 3.25E+03 events/s + [COUNTERS] PROGRAM TOTAL : 43.7199s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9892s + [COUNTERS] Fortran MEs ( 1 ) : 41.7307s for 81920 events => throughput is 1.96E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926843] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.3904s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2232s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.1611s for 8192 events => throughput is 2.59E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s + [COUNTERS] PROGRAM TOTAL : 4.6017s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2867s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.3065s for 8192 events => throughput is 1.90E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926843) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283632] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 33.1597s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3978s - [COUNTERS] CudaCpp MEs ( 2 ) : 31.7557s for 81920 events => throughput is 2.58E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s + [COUNTERS] PROGRAM TOTAL : 45.3130s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0098s + [COUNTERS] CudaCpp MEs ( 2 ) : 43.2947s for 81920 events => throughput is 1.89E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283632) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.681013e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.952909e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.677051e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.958655e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926832] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.7580s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2223s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.5326s for 8192 events => throughput is 5.35E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s + [COUNTERS] PROGRAM TOTAL : 2.5865s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2840s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.2977s for 8192 events => throughput is 3.57E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926832) differ by less than 3E-14 (1.2212453270876722e-15) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283630] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930257969248325] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 16.8796s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3725s - [COUNTERS] CudaCpp MEs ( 2 ) : 15.5040s for 81920 events => throughput is 5.28E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s + [COUNTERS] PROGRAM TOTAL : 25.1319s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9980s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.1295s for 81920 events => throughput is 3.54E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0045s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283630) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248325) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.482381e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.678244e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.503997e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.697932e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926854] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.9114s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2254s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6844s for 8192 events => throughput is 1.20E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 1.2929s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2902s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0004s for 8192 events => throughput is 8.19E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849706926854) differ by less than 3E-14 (5.551115123125783e-16) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283624] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 8.1097s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3682s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.7400s for 81920 events => throughput is 1.22E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 12.1225s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0172s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.1030s for 81920 events => throughput is 8.11E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930270975283624) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.246344e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.443138e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.418128e+03 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 1.1699s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2861s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8818s for 8192 events => throughput is 9.29E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 10.9025s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0005s + [COUNTERS] CudaCpp MEs ( 2 ) : 8.9000s for 81920 events => throughput is 9.20E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.551865e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.251733e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.487120e+03 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 1.4032s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2864s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1141s for 8192 events => throughput is 7.35E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 13.1691s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0018s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.1648s for 81920 events => throughput is 7.34E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.467628e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.444952e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926843] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7017s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4925s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1103s for 8192 events => throughput is 7.42E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0989s + [COUNTERS] PROGRAM TOTAL : 0.7927s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7197s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0384s for 8192 events => throughput is 2.13E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0346s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and hip (0.33144849706926843) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283644] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930257969248336] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.8477s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6834s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0651s for 81920 events => throughput is 7.69E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0992s + [COUNTERS] PROGRAM TOTAL : 2.7809s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4100s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3362s for 81920 events => throughput is 2.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and hip (0.20930270975283644) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (0.20930257969248323) and cuda (0.20930257969248336) differ by less than 3E-14 (6.661338147750939e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.511967e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.147561e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.048767e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.353804e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.810313e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.122777e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.859104e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.172118e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.808984e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.120194e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.631150e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.166091e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.811154e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.125549e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.829336e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.430424e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index dc6ff47a1e..702a33cbc5 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-10-04_12:03:36 +DATE: 2024-10-03_00:06:36 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 2.7355s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2198s - [COUNTERS] Fortran MEs ( 1 ) : 2.5158s for 8192 events => throughput is 3.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4507s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2872s + [COUNTERS] Fortran MEs ( 1 ) : 4.1635s for 8192 events => throughput is 1.97E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.7838s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2710s - [COUNTERS] Fortran MEs ( 1 ) : 2.5128s for 8192 events => throughput is 3.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4557s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2815s + [COUNTERS] Fortran MEs ( 1 ) : 4.1743s for 8192 events => throughput is 1.96E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 26.5652s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4342s - [COUNTERS] Fortran MEs ( 1 ) : 25.1310s for 81920 events => throughput is 3.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 43.8607s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9951s + [COUNTERS] Fortran MEs ( 1 ) : 41.8656s for 81920 events => throughput is 1.96E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3315 [0.33145004642682091] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144941544531159] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.2930s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2219s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.0651s for 8192 events => throughput is 2.67E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0060s + [COUNTERS] PROGRAM TOTAL : 4.4956s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2887s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.1989s for 8192 events => throughput is 1.95E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0081s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33145004642682091) differ by less than 4E-4 (4.6745046844431926e-06) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144941544531159) differ by less than 4E-4 (4.675947774535061e-06) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930342252742398] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930329135137288] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 32.1448s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3719s - [COUNTERS] CudaCpp MEs ( 2 ) : 30.7669s for 81920 events => throughput is 2.66E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0060s + [COUNTERS] PROGRAM TOTAL : 43.9267s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9929s + [COUNTERS] CudaCpp MEs ( 2 ) : 41.9257s for 81920 events => throughput is 1.95E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0081s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930342252742398) differ by less than 4E-4 (3.405472335016313e-06) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930329135137288) differ by less than 4E-4 (3.400143900211816e-06) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.754667e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.014568e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.746206e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.012026e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +205,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144996928807552] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144937378275385] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.9935s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2243s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.7676s for 8192 events => throughput is 1.07E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [COUNTERS] PROGRAM TOTAL : 1.4417s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2863s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1531s for 8192 events => throughput is 7.10E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144996928807552) differ by less than 4E-4 (4.441772461838411e-06) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144937378275385) differ by less than 4E-4 (4.550249099066761e-06) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +240,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930338466143997] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930324959819654] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 9.1868s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4152s - [COUNTERS] CudaCpp MEs ( 2 ) : 7.7699s for 81920 events => throughput is 1.05E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + [COUNTERS] PROGRAM TOTAL : 13.6612s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0142s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.6444s for 81920 events => throughput is 7.04E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930338466143997) differ by less than 4E-4 (3.2245574101974483e-06) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930324959819654) differ by less than 4E-4 (3.2006567445286294e-06) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.096480e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.242904e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.097849e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.273553e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +285,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3315 [0.33145003508801812] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.5705s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2235s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3461s for 8192 events => throughput is 2.37E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 0.7933s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2859s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5062s for 8192 events => throughput is 1.62E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33145003508801812) differ by less than 4E-4 (4.6402948361556895e-06) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +320,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930341333868943] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 4.8598s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3988s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.4602s for 81920 events => throughput is 2.37E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + [COUNTERS] PROGRAM TOTAL : 7.0232s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9972s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.0248s for 81920 events => throughput is 1.63E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930341333868943) differ by less than 4E-4 (3.361570683813042e-06) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.423170e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.674381e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.661626e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 0.7425s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2857s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4556s for 8192 events => throughput is 1.80E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 6.5552s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0045s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.5495s for 81920 events => throughput is 1.80E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0012s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.845679e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.433338e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.886817e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144947551388249] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 0.8375s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2877s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5485s for 8192 events => throughput is 1.49E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144947551388249) differ by less than 4E-4 (4.857178601991308e-06) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930331717025510] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 7.5027s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9956s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5057s for 81920 events => throughput is 1.49E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930331717025510) differ by less than 4E-4 (3.523500632152121e-06) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.507537e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.510473e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +525,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3315 [0.33145003134925582] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144955535316123] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.6732s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4921s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0721s for 8192 events => throughput is 1.14E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1090s + [COUNTERS] PROGRAM TOTAL : 0.7720s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7206s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0269s for 8192 events => throughput is 3.05E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0245s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and hip (0.33145003134925582) differ by less than 4E-4 (4.629014765944461e-06) +OK! xsec from fortran (0.33144786561240197) and cuda (0.33144955535316123) differ by less than 4E-4 (5.0980589545446264e-06) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +560,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930346901257960] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930336562619947] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.4415s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6511s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6816s for 81920 events => throughput is 1.20E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1088s + [COUNTERS] PROGRAM TOTAL : 2.6799s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4230s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2326s for 81920 events => throughput is 3.52E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0244s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and hip (0.20930346901257960) differ by less than 4E-4 (3.6275676709163207e-06) +OK! xsec from fortran (0.20930257969248323) and cuda (0.20930336562619947) differ by less than 4E-4 (3.755012085271403e-06) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.155724e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.113806e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.933893e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.387968e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.956222e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.095200e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.074175e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.214105e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.958991e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.131792e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.277745e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.212764e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.955651e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.089022e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.769522e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.392733e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 158ac94012..31826ff276 100644 --- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' OMP_NUM_THREADS= -DATE: 2024-10-04_12:06:30 +DATE: 2024-10-03_00:09:34 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 223 events) - [COUNTERS] PROGRAM TOTAL : 2.7225s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2186s - [COUNTERS] Fortran MEs ( 1 ) : 2.5040s for 8192 events => throughput is 3.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4565s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2843s + [COUNTERS] Fortran MEs ( 1 ) : 4.1722s for 8192 events => throughput is 1.96E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x1_fortran > /tmp/valassia/output_ggttgg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/avalassi/output_ggttgg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849706926871] fbridge_mode=0 + [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 2.7588s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2519s - [COUNTERS] Fortran MEs ( 1 ) : 2.5070s for 8192 events => throughput is 3.27E+03 events/s + [COUNTERS] PROGRAM TOTAL : 4.4257s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2811s + [COUNTERS] Fortran MEs ( 1 ) : 4.1447s for 8192 events => throughput is 1.98E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttgg_x10_fortran > /tmp/valassia/output_ggttgg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/avalassi/output_ggttgg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930270975283627] fbridge_mode=0 + [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 26.5426s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3923s - [COUNTERS] Fortran MEs ( 1 ) : 25.1503s for 81920 events => throughput is 3.26E+03 events/s + [COUNTERS] PROGRAM TOTAL : 43.7093s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9954s + [COUNTERS] Fortran MEs ( 1 ) : 41.7139s for 81920 events => throughput is 1.96E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849880304822] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786734542164] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 3.3978s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2217s - [COUNTERS] CudaCpp MEs ( 2 ) : 3.1699s for 8192 events => throughput is 2.58E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s + [COUNTERS] PROGRAM TOTAL : 4.7251s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2941s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.4223s for 8192 events => throughput is 1.85E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0087s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849880304822) differ by less than 2E-4 (5.230916810816666e-09) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786734542164) differ by less than 2E-4 (5.228634192278037e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930271054111049] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930258048084049] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 33.2111s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3776s - [COUNTERS] CudaCpp MEs ( 2 ) : 31.8273s for 81920 events => throughput is 2.57E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0062s + [COUNTERS] PROGRAM TOTAL : 45.7171s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9919s + [COUNTERS] CudaCpp MEs ( 2 ) : 43.7167s for 81920 events => throughput is 1.87E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271054111049) differ by less than 2E-4 (3.766192246956734e-09) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258048084049) differ by less than 2E-4 (3.766591261111785e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.680645e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.939321e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.679354e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.929194e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849797290254] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786651655289] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 1.7465s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2254s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.5180s for 8192 events => throughput is 5.40E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s + [COUNTERS] PROGRAM TOTAL : 2.6038s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2841s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3150s for 8192 events => throughput is 3.54E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0046s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849797290254) differ by less than 2E-4 (2.7263173940639263e-09) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786651655289) differ by less than 2E-4 (2.7278828085286477e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930271025983213] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930258019984904] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 16.6740s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3763s - [COUNTERS] CudaCpp MEs ( 2 ) : 15.2946s for 81920 events => throughput is 5.36E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s + [COUNTERS] PROGRAM TOTAL : 25.0226s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9994s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.0184s for 81920 events => throughput is 3.56E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0047s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271025983213) differ by less than 2E-4 (2.4223090200337083e-09) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019984904) differ by less than 2E-4 (2.424078271445751e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.552453e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.656422e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.571907e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.652891e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849773665513] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.9076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2256s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6805s for 8192 events => throughput is 1.20E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 1.2899s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2852s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.0025s for 8192 events => throughput is 8.17E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0022s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and cpp (0.33144849773665513) differ by less than 2E-4 (2.013544886381169e-09) +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930271025898603] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 8.2499s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4024s - [COUNTERS] CudaCpp MEs ( 2 ) : 6.8460s for 81920 events => throughput is 1.20E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s + [COUNTERS] PROGRAM TOTAL : 12.0048s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9956s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.0070s for 81920 events => throughput is 8.19E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0023s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and cpp (0.20930271025898603) differ by less than 2E-4 (2.418266698001048e-09) +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.235936e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.425311e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.431412e+03 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 1.1516s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2832s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8664s for 8192 events => throughput is 9.46E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 10.7114s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9864s + [COUNTERS] CudaCpp MEs ( 2 ) : 8.7229s for 81920 events => throughput is 9.39E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0021s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.568644e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.229570e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.554146e+03 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1 + [UNWEIGHT] Wrote 7 events (found 213 events) + [COUNTERS] PROGRAM TOTAL : 1.4200s + [COUNTERS] Fortran Overhead ( 0 ) : 0.2915s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1258s for 8192 events => throughput is 7.28E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 104 + [XSECTION] ChannelId = 112 + [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1 + [UNWEIGHT] Wrote 17 events (found 331 events) + [COUNTERS] PROGRAM TOTAL : 13.2897s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9983s + [COUNTERS] CudaCpp MEs ( 2 ) : 11.2889s for 81920 events => throughput is 7.26E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0025s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.423207e+03 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.188334e+03 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x1_cudacpp > /tmp/valassia/output_ggttgg_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x1_cudacpp > /tmp/avalassi/output_ggttgg_x1_cudacpp' DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.3314 [0.33144849679653593] fbridge_mode=1 + [XSECTION] Cross section = 0.3314 [0.33144786533876569] fbridge_mode=1 [UNWEIGHT] Wrote 7 events (found 213 events) - [COUNTERS] PROGRAM TOTAL : 0.7028s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4930s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1107s for 8192 events => throughput is 7.40E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0991s + [COUNTERS] PROGRAM TOTAL : 0.7990s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7259s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0383s for 8192 events => throughput is 2.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.33144849706926871) and hip (0.33144849679653593) differ by less than 2E-4 (8.228511205743416e-10) +OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786533876569) differ by less than 2E-4 (8.255786054789382e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_ggttgg_x10_cudacpp > /tmp/valassia/output_ggttgg_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp' DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 104 [XSECTION] ChannelId = 112 - [XSECTION] Cross section = 0.2093 [0.20930271009954451] fbridge_mode=1 + [XSECTION] Cross section = 0.2093 [0.20930258003933860] fbridge_mode=1 [UNWEIGHT] Wrote 17 events (found 331 events) - [COUNTERS] PROGRAM TOTAL : 2.8418s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6722s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.0702s for 81920 events => throughput is 7.65E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0995s + [COUNTERS] PROGRAM TOTAL : 2.7965s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4257s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3361s for 81920 events => throughput is 2.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0347s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20930270975283627) and hip (0.20930271009954451) differ by less than 2E-4 (1.6564918325912004e-09) +OK! xsec from fortran (0.20930257969248323) and cuda (0.20930258003933860) differ by less than 2E-4 (1.6571959360334176e-09) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.499906e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.172471e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.007237e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.362761e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.803764e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.126051e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.824759e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.165509e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.806219e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.125049e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.604334e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.168356e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.802602e+05 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.132671e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.820495e+04 ) sec^-1 +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.419294e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index 5700ce5a9f..1c9ef17ccc 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,21 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg + + +make USEBUILDDIR=1 BACKEND=cuda + +make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppsse4 + +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-10-04_12:11:04 +DATE: 2024-10-03_00:14:52 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -29,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 55.1538s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4743s - [COUNTERS] Fortran MEs ( 1 ) : 54.6795s for 8192 events => throughput is 1.50E+02 events/s + [COUNTERS] PROGRAM TOTAL : 101.3500s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5239s + [COUNTERS] Fortran MEs ( 1 ) : 100.8261s for 8192 events => throughput is 8.12E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -54,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 55.1752s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3889s - [COUNTERS] Fortran MEs ( 1 ) : 54.7863s for 8192 events => throughput is 1.50E+02 events/s + [COUNTERS] PROGRAM TOTAL : 100.9221s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5152s + [COUNTERS] Fortran MEs ( 1 ) : 100.4069s for 8192 events => throughput is 8.16E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -79,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 552.2886s - [COUNTERS] Fortran Overhead ( 0 ) : 4.0331s - [COUNTERS] Fortran MEs ( 1 ) : 548.2555s for 81920 events => throughput is 1.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 998.1100s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3849s + [COUNTERS] Fortran MEs ( 1 ) : 993.7252s for 81920 events => throughput is 8.24E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -104,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729949E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 86.6739s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4545s - [COUNTERS] CudaCpp MEs ( 2 ) : 86.0604s for 8192 events => throughput is 9.52E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1591s + [COUNTERS] PROGRAM TOTAL : 119.7848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5133s + [COUNTERS] CudaCpp MEs ( 2 ) : 119.0752s for 8192 events => throughput is 6.88E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1962s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729949E-007) differ by less than 3E-14 (3.552713678800501e-15) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -139,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333072E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633775E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 867.1055s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7996s - [COUNTERS] CudaCpp MEs ( 2 ) : 864.1713s for 81920 events => throughput is 9.48E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1346s + [COUNTERS] PROGRAM TOTAL : 1194.8842s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3319s + [COUNTERS] CudaCpp MEs ( 2 ) : 1190.3522s for 81920 events => throughput is 6.88E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2001s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333072E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633775E-007) differ by less than 3E-14 (1.5543122344752192e-15) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.195599e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.974801e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.199200e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.902621e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -184,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729943E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 43.8186s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4341s - [COUNTERS] CudaCpp MEs ( 2 ) : 43.2592s for 8192 events => throughput is 1.89E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1253s + [COUNTERS] PROGRAM TOTAL : 62.0110s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5249s + [COUNTERS] CudaCpp MEs ( 2 ) : 61.3838s for 8192 events => throughput is 1.33E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1022s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729943E-007) differ by less than 3E-14 (3.3306690738754696e-15) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -219,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333069E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 434.4773s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8275s - [COUNTERS] CudaCpp MEs ( 2 ) : 431.5815s for 81920 events => throughput is 1.90E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0683s + [COUNTERS] PROGRAM TOTAL : 616.2779s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3647s + [COUNTERS] CudaCpp MEs ( 2 ) : 611.8092s for 81920 events => throughput is 1.34E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1039s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333069E-007) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.297706e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.632598e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.357210e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.628468e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -264,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729933E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 20.1425s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4087s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.6505s for 8192 events => throughput is 4.17E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0833s + [COUNTERS] PROGRAM TOTAL : 28.8684s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5085s + [COUNTERS] CudaCpp MEs ( 2 ) : 28.3140s for 8192 events => throughput is 2.89E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0459s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019835729933E-007) differ by less than 3E-14 (2.886579864025407e-15) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -299,45 +319,309 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333072E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 200.9873s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7857s - [COUNTERS] CudaCpp MEs ( 2 ) : 198.1703s for 81920 events => throughput is 4.13E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0313s + [COUNTERS] PROGRAM TOTAL : 284.5568s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3064s + [COUNTERS] CudaCpp MEs ( 2 ) : 280.2035s for 81920 events => throughput is 2.92E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0469s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858527333072E-007) differ by less than 3E-14 (1.5543122344752192e-15) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.148582e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.517015e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.538692e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 25.2889s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5134s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.7360s for 8192 events => throughput is 3.31E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0395s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 254.5108s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3262s + [COUNTERS] CudaCpp MEs ( 2 ) : 250.1446s for 81920 events => throughput is 3.27E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0399s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.062937e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.068720e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 24.8525s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5118s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.2957s for 8192 events => throughput is 3.37E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0449s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 250.4117s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3538s + [COUNTERS] CudaCpp MEs ( 2 ) : 246.0095s for 81920 events => throughput is 3.33E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0485s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.630906e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.204472e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.609231e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 3.2173s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0360s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.1013s for 8192 events => throughput is 7.44E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0800s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633791E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 16.7881s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8408s + [COUNTERS] CudaCpp MEs ( 2 ) : 10.8652s for 81920 events => throughput is 7.54E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 1.0822s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713115633791E-007) differ by less than 3E-14 (2.220446049250313e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.474483e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.239436e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.257821e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.542937e+03 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.224358e+03 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.425016e+03 ) sec^-1 -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.260076e+03 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.246009e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index b90b1d8d16..4235e6c48d 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,21 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg + +make USEBUILDDIR=1 BACKEND=cuda + + + +make USEBUILDDIR=1 BACKEND=cppnone + +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-10-04_12:53:49 +DATE: 2024-10-03_01:30:56 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -29,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 55.1920s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3538s - [COUNTERS] Fortran MEs ( 1 ) : 54.8381s for 8192 events => throughput is 1.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 101.4851s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5203s + [COUNTERS] Fortran MEs ( 1 ) : 100.9648s for 8192 events => throughput is 8.11E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -54,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 55.1638s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3878s - [COUNTERS] Fortran MEs ( 1 ) : 54.7760s for 8192 events => throughput is 1.50E+02 events/s + [COUNTERS] PROGRAM TOTAL : 100.7472s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5235s + [COUNTERS] Fortran MEs ( 1 ) : 100.2237s for 8192 events => throughput is 8.17E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -79,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 552.3796s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8778s - [COUNTERS] Fortran MEs ( 1 ) : 549.5018s for 81920 events => throughput is 1.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1009.1613s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4219s + [COUNTERS] Fortran MEs ( 1 ) : 1004.7394s for 81920 events => throughput is 8.15E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -104,25 +124,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575308139230432E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575849446922190E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 89.4764s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4248s - [COUNTERS] CudaCpp MEs ( 2 ) : 88.8225s for 8192 events => throughput is 9.22E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.2290s + [COUNTERS] PROGRAM TOTAL : 110.1880s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5092s + [COUNTERS] CudaCpp MEs ( 2 ) : 109.4957s for 8192 events => throughput is 7.48E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1831s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575308139230432E-007) differ by less than 4E-4 (0.0001395002856556804) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575849446922190E-007) differ by less than 4E-4 (0.00013947977747852391) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -140,39 +160,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2846099389242361E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2845954405861011E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 895.8954s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8367s - [COUNTERS] CudaCpp MEs ( 2 ) : 892.9121s for 81920 events => throughput is 9.17E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1466s + [COUNTERS] PROGRAM TOTAL : 1102.6591s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3176s + [COUNTERS] CudaCpp MEs ( 2 ) : 1098.1619s for 81920 events => throughput is 7.46E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1796s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846099389242361E-007) differ by less than 4E-4 (0.00014187637267237818) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845954405861011E-007) differ by less than 4E-4 (0.00014189602657355138) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.094534e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.906901e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.098895e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.884410e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -186,25 +206,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575303913232094E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575845178322101E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 20.9041s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4832s - [COUNTERS] CudaCpp MEs ( 2 ) : 20.3319s for 8192 events => throughput is 4.03E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0890s + [COUNTERS] PROGRAM TOTAL : 27.5604s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5117s + [COUNTERS] CudaCpp MEs ( 2 ) : 27.0033s for 8192 events => throughput is 3.03E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0454s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575303913232094E-007) differ by less than 4E-4 (0.00013932100537483727) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845178322101E-007) differ by less than 4E-4 (0.0001392986940575991) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -222,39 +242,39 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2846096068245575E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2845949484525033E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 204.8498s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8168s - [COUNTERS] CudaCpp MEs ( 2 ) : 202.0002s for 81920 events => throughput is 4.06E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0328s + [COUNTERS] PROGRAM TOTAL : 271.4748s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3092s + [COUNTERS] CudaCpp MEs ( 2 ) : 267.1201s for 81920 events => throughput is 3.07E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0455s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846096068245575E-007) differ by less than 4E-4 (0.00014173098820635666) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845949484525033E-007) differ by less than 4E-4 (0.00014168058211416756) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.940133e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.509205e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.860175e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.514230e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -268,25 +288,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.358e-07 [2.3575304434295576E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 10.2208s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3848s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.7269s for 8192 events => throughput is 8.42E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1090s + [COUNTERS] PROGRAM TOTAL : 14.2097s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5091s + [COUNTERS] CudaCpp MEs ( 2 ) : 13.6782s for 8192 events => throughput is 5.99E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0224s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3575304434295576E-007) differ by less than 4E-4 (0.0001393431105436438) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -304,46 +324,314 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.285e-07 [2.2846087407964351E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 101.2826s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8628s - [COUNTERS] CudaCpp MEs ( 2 ) : 98.4042s for 81920 events => throughput is 8.32E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0156s + [COUNTERS] PROGRAM TOTAL : 143.5363s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3592s + [COUNTERS] CudaCpp MEs ( 2 ) : 139.1540s for 81920 events => throughput is 5.89E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0230s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2846087407964351E-007) differ by less than 4E-4 (0.00014135186397323807) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.030804e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.841559e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.933769e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 12.8982s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5095s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.3688s for 8192 events => throughput is 6.62E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0200s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 130.1707s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3403s + [COUNTERS] CudaCpp MEs ( 2 ) : 125.8089s for 81920 events => throughput is 6.51E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0214s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.983770e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.944370e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.358e-07 [2.3575850859831750E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 12.5708s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5217s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.0269s for 8192 events => throughput is 6.81E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0222s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575850859831750E-007) differ by less than 4E-4 (0.00013953971621538663) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.285e-07 [2.2845946568145136E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 124.0846s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3219s + [COUNTERS] CudaCpp MEs ( 2 ) : 119.7399s for 81920 events => throughput is 6.84E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0228s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845946568145136E-007) differ by less than 4E-4 (0.00014155290989403824) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.302945e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.047420e+03 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.303967e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.358e-07 [2.3575862304433055E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 2.1905s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0793s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5428s for 8192 events => throughput is 1.51E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5684s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3575862304433055E-007) differ by less than 4E-4 (0.00014002522141920437) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.285e-07 [2.2845959888250639E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 10.7124s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8151s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.3406s for 81920 events => throughput is 1.53E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.5567s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2845959888250639E-007) differ by less than 4E-4 (0.0001421360326359089) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.518595e+04 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.518521e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.124721e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.157002e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.133696e+04 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.149769e+04 ) sec^-1 -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.138034e+04 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.016595e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 6e71297983..cd5c681c8c 100644 --- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,21 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - -make: Nothing to be done for 'all'. - +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg + +make USEBUILDDIR=1 BACKEND=cuda + + + +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 + +make USEBUILDDIR=1 BACKEND=cppavx2 +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' + +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg' OMP_NUM_THREADS= -DATE: 2024-10-04_13:30:16 +DATE: 2024-10-03_02:29:14 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -29,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 55.2559s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3532s - [COUNTERS] Fortran MEs ( 1 ) : 54.9027s for 8192 events => throughput is 1.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 100.0620s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5189s + [COUNTERS] Fortran MEs ( 1 ) : 99.5431s for 8192 events => throughput is 8.23E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -54,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x1_fortran > /tmp/valassia/output_ggttggg_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/avalassi/output_ggttggg_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019835729867E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 55.1771s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4120s - [COUNTERS] Fortran MEs ( 1 ) : 54.7651s for 8192 events => throughput is 1.50E+02 events/s + [COUNTERS] PROGRAM TOTAL : 100.3451s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5174s + [COUNTERS] Fortran MEs ( 1 ) : 99.8277s for 8192 events => throughput is 8.21E+01 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -79,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_ggttggg_x10_fortran > /tmp/valassia/output_ggttggg_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/avalassi/output_ggttggg_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858527333038E-007] fbridge_mode=0 + [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 551.6162s - [COUNTERS] Fortran Overhead ( 0 ) : 2.7869s - [COUNTERS] Fortran MEs ( 1 ) : 548.8293s for 81920 events => throughput is 1.49E+02 events/s + [COUNTERS] PROGRAM TOTAL : 1003.8857s + [COUNTERS] Fortran Overhead ( 0 ) : 4.4277s + [COUNTERS] Fortran MEs ( 1 ) : 999.4580s for 81920 events => throughput is 8.20E+01 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -104,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019963403161E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561678995975E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 86.7707s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4346s - [COUNTERS] CudaCpp MEs ( 2 ) : 86.1409s for 8192 events => throughput is 9.51E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1952s + [COUNTERS] PROGRAM TOTAL : 123.2681s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5157s + [COUNTERS] CudaCpp MEs ( 2 ) : 122.5482s for 8192 events => throughput is 6.68E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2041s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019963403161E-007) differ by less than 2E-4 (5.416306958494488e-09) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561678995975E-007) differ by less than 2E-4 (5.417890580616813e-09) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -139,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858650293213E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713238614534E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 868.4026s - [COUNTERS] Fortran Overhead ( 0 ) : 2.8203s - [COUNTERS] CudaCpp MEs ( 2 ) : 865.4484s for 81920 events => throughput is 9.47E+01 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.1339s + [COUNTERS] PROGRAM TOTAL : 1239.6410s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3289s + [COUNTERS] CudaCpp MEs ( 2 ) : 1235.1064s for 81920 events => throughput is 6.63E+01 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.2057s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858650293213E-007) differ by less than 2E-4 (5.3828717039294816e-09) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713238614534E-007) differ by less than 2E-4 (5.38380851011766e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.193941e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.864466e+01 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.189969e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.890596e+01 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -184,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019985761424E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561701257335E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 42.2548s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3715s - [COUNTERS] CudaCpp MEs ( 2 ) : 41.8184s for 8192 events => throughput is 1.96E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0649s + [COUNTERS] PROGRAM TOTAL : 61.9882s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5115s + [COUNTERS] CudaCpp MEs ( 2 ) : 61.3746s for 8192 events => throughput is 1.33E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.1021s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019985761424E-007) differ by less than 2E-4 (6.364815563486559e-09) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561701257335E-007) differ by less than 2E-4 (6.3622664914220195e-09) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -219,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858654239918E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713242471448E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 426.7406s - [COUNTERS] Fortran Overhead ( 0 ) : 3.8760s - [COUNTERS] CudaCpp MEs ( 2 ) : 422.8001s for 81920 events => throughput is 1.94E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0645s + [COUNTERS] PROGRAM TOTAL : 618.7847s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3324s + [COUNTERS] CudaCpp MEs ( 2 ) : 614.3530s for 81920 events => throughput is 1.33E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0993s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858654239918E-007) differ by less than 2E-4 (5.555647941690722e-09) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713242471448E-007) differ by less than 2E-4 (5.552655002460938e-09) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.472663e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.600496e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.481727e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.598870e+02 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -264,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x1_cudacpp > /tmp/valassia/output_ggttggg_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.357e-07 [2.3572019990398792E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 [UNWEIGHT] Wrote 18 events (found 285 events) - [COUNTERS] PROGRAM TOTAL : 25.1693s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9111s - [COUNTERS] CudaCpp MEs ( 2 ) : 19.0002s for 8192 events => throughput is 4.31E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 4.2579s + [COUNTERS] PROGRAM TOTAL : 27.3953s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5156s + [COUNTERS] CudaCpp MEs ( 2 ) : 26.8357s for 8192 events => throughput is 3.05E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0440s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.3572019835729867E-007) and cpp (2.3572019990398792E-007) differ by less than 2E-4 (6.5615473054947415e-09) +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -299,45 +319,309 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_ggttggg_x10_cudacpp > /tmp/valassia/output_ggttggg_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 128/128 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.284e-07 [2.2842858652988808E-007] fbridge_mode=1 + [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 [UNWEIGHT] Wrote 380 events (found 1707 events) - [COUNTERS] PROGRAM TOTAL : 193.2577s - [COUNTERS] Fortran Overhead ( 0 ) : 3.0250s - [COUNTERS] CudaCpp MEs ( 2 ) : 190.2033s for 81920 events => throughput is 4.31E+02 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0294s + [COUNTERS] PROGRAM TOTAL : 270.5862s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3334s + [COUNTERS] CudaCpp MEs ( 2 ) : 266.2094s for 81920 events => throughput is 3.08E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0434s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.2842858527333038E-007) and cpp (2.2842858652988808E-007) differ by less than 2E-4 (5.500877753306099e-09) +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.509905e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.729666e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.712586e+02 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 24.1058s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5083s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.5601s for 8192 events => throughput is 3.48E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0374s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 238.9805s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3052s + [COUNTERS] CudaCpp MEs ( 2 ) : 234.6373s for 81920 events => throughput is 3.49E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0380s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.313097e+02 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.298085e+02 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 24.6954s + [COUNTERS] Fortran Overhead ( 0 ) : 0.5093s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.1424s for 8192 events => throughput is 3.39E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0437s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 245.9606s + [COUNTERS] Fortran Overhead ( 0 ) : 4.3057s + [COUNTERS] CudaCpp MEs ( 2 ) : 241.6115s for 81920 events => throughput is 3.39E+02 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0435s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.675482e+02 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.466414e+02 ) sec^-1 +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.688823e+02 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x1_cudacpp > /tmp/avalassi/output_ggttggg_x1_cudacpp' +DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.357e-07 [2.3572561518129465E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 18 events (found 285 events) + [COUNTERS] PROGRAM TOTAL : 2.8142s + [COUNTERS] Fortran Overhead ( 0 ) : 1.0560s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.8779s for 8192 events => throughput is 9.33E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8804s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561518129465E-007) differ by less than 2E-4 (1.4064212017217415e-09) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp' +DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 128/128 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.284e-07 [2.2842713109538129E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 380 events (found 1707 events) + [COUNTERS] PROGRAM TOTAL : 14.3181s + [COUNTERS] Fortran Overhead ( 0 ) : 4.8117s + [COUNTERS] CudaCpp MEs ( 2 ) : 8.6324s for 81920 events => throughput is 9.49E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.8741s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713109538129E-007) differ by less than 2E-4 (2.668514298420632e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical + +*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.385803e+03 ) sec^-1 + +*** EXECUTE GCHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.083008e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.106276e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX) -p 512 32 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.157843e+04 ) sec^-1 + +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.105164e+04 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** EXECUTE GCHECK(MAX128THR) -p 128 128 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.108864e+04 ) sec^-1 -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.109773e+04 ) sec^-1 -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 *** +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.669145e+03 ) sec^-1 -*** (3-hip) WARNING! SKIP MADEVENT_HIP (gg_ttggg is not supported on hip #933) *** +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index 200d2a01cc..b69bdf2fc8 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-10-04_12:10:01 +DATE: 2024-10-03_00:13:18 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.4368s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3900s - [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5125s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4425s + [COUNTERS] Fortran MEs ( 1 ) : 0.0700s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3329s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2862s - [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4614s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3919s + [COUNTERS] Fortran MEs ( 1 ) : 0.0695s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.7102s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2439s - [COUNTERS] Fortran MEs ( 1 ) : 0.4663s for 81920 events => throughput is 1.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5135s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8238s + [COUNTERS] Fortran MEs ( 1 ) : 0.6897s for 81920 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504505737132] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3440s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2879s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0557s for 8192 events => throughput is 1.47E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4747s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3987s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0753s for 8192 events => throughput is 1.09E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456871) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737132) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575784] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.8088s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2508s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5576s for 81920 events => throughput is 1.47E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.5959s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8445s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7507s for 81920 events => throughput is 1.09E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575784) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.501528e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.104333e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.500779e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.103333e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456874] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504505737170] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3244s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2926s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0315s for 8192 events => throughput is 2.60E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4348s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3924s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0418s for 8192 events => throughput is 1.96E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456874) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737170) differ by less than 3E-14 (2.220446049250313e-15) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877427590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.5574s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2441s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3130s for 81920 events => throughput is 2.62E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.2944s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8727s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4211s for 81920 events => throughput is 1.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575781) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427590) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.595848e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.906811e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.601905e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.965411e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3104s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2934s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0168s for 8192 events => throughput is 4.87E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4283s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0238s for 8192 events => throughput is 3.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701704456871) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,120 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575775] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.4143s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2459s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1681s for 81920 events => throughput is 4.87E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0830s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8442s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 81920 events => throughput is 3.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376575775) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.032176e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.370259e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.080030e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.306992e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4190s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3967s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0219s for 8192 events => throughput is 3.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.1007s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8793s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2209s for 81920 events => throughput is 3.71E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.662156e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.791971e+05 ) sec^-1 -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +444,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 32/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.5758s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5577s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0104s + [COUNTERS] PROGRAM TOTAL : 0.4278s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3952s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0322s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and hip (0.20313701704456871) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +479,149 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 32/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.5727s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5219s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0402s for 81920 events => throughput is 2.04E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0106s + [COUNTERS] PROGRAM TOTAL : 2.1576s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8414s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3157s for 81920 events => throughput is 2.59E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.479668e+05 ) sec^-1 -OK! xsec from fortran (0.21095771376575781) and hip (0.21095771376575781) differ by less than 3E-14 (0.0) +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.507584e+05 ) sec^-1 -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504505737173] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.8445s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8402s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.56E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504505737173) differ by less than 3E-14 (2.220446049250313e-15) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.2812s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2704s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0098s for 81920 events => throughput is 8.39E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.050073e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.052839e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.958312e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.425419e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.477111e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.341421e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.794526e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.151138e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.464201e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.326674e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.794149e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.296661e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.430799e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.336891e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.197375e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.653723e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index f0273e55a1..ef9be9efc8 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-10-04_12:10:22 +DATE: 2024-10-03_00:13:50 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.3658s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3191s - [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5037s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4359s + [COUNTERS] Fortran MEs ( 1 ) : 0.0679s for 8192 events => throughput is 1.21E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3361s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2893s - [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4646s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3955s + [COUNTERS] Fortran MEs ( 1 ) : 0.0691s for 8192 events => throughput is 1.19E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.7102s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2435s - [COUNTERS] Fortran MEs ( 1 ) : 0.4667s for 81920 events => throughput is 1.76E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5081s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8204s + [COUNTERS] Fortran MEs ( 1 ) : 0.6877s for 81920 events => throughput is 1.19E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313702859087712] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313506133732837] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3432s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2921s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0508s for 8192 events => throughput is 1.61E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4665s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3948s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0711s for 8192 events => throughput is 1.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313702859087712) differ by less than 4E-4 (5.6840001816382824e-08) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313506133732837) differ by less than 4E-4 (8.014351782215101e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095770771365008] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842907143103] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.7678s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2609s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5066s for 81920 events => throughput is 1.62E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.5534s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8451s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7077s for 81920 events => throughput is 1.16E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095770771365008) differ by less than 4E-4 (2.86887245071199e-08) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842907143103) differ by less than 4E-4 (1.4085954624931674e-09) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.685173e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.157236e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.679429e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.172561e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313700465139972] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313502997679400] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3138s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2931s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0205s for 8192 events => throughput is 4.00E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4213s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3956s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0254s for 8192 events => throughput is 3.23E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313700465139972) differ by less than 4E-4 (6.100891492000216e-08) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502997679400) differ by less than 4E-4 (7.423917058879681e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095768752291760] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095839656505114] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.5581s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3601s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1979s for 81920 events => throughput is 4.14E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.1080s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8479s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2597s for 81920 events => throughput is 3.15E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095768752291760) differ by less than 4E-4 (1.2439858076973564e-07) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839656505114) differ by less than 4E-4 (1.5268043562777223e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.149490e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.049325e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.120908e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.028245e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313700354235445] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3221s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3116s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0104s for 8192 events => throughput is 7.85E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4204s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4062s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0139s for 8192 events => throughput is 5.90E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313700354235445) differ by less than 4E-4 (6.646850714275843e-08) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095768538537163] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.4400s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3371s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1028s for 81920 events => throughput is 7.97E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9551s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8278s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1270s for 81920 events => throughput is 6.45E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095768538537163) differ by less than 4E-4 (1.3453116110007102e-07) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.044738e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.240683e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.240258e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.282933e+05 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4123s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4000s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0120s for 8192 events => throughput is 6.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 1.9645s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8463s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1178s for 81920 events => throughput is 6.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.681108e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.800809e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313505300145301] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4128s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3966s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0158s for 8192 events => throughput is 5.17E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313505300145301) differ by less than 4E-4 (3.910739154733278e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842133012335] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.0059s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8483s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1573s for 81920 events => throughput is 5.21E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842133012335) differ by less than 4E-4 (3.528729641821826e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.857587e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.809270e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 32/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313702542257728] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313508590887899] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.6091s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5921s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0055s for 8192 events => throughput is 1.49E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0116s + [COUNTERS] PROGRAM TOTAL : 0.8343s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8305s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0029s for 8192 events => throughput is 2.81E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and hip (0.20313702542257728) differ by less than 4E-4 (4.1243140680435886e-08) +OK! xsec from fortran (0.20313504505737126) and cuda (0.20313508590887899) differ by less than 4E-4 (2.011051698502797e-07) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 32/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095770853284573] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095846337765808] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.6569s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6329s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0132s for 81920 events => throughput is 6.20E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0109s + [COUNTERS] PROGRAM TOTAL : 2.2771s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2677s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0086s for 81920 events => throughput is 9.50E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and hip (0.21095770853284573) differ by less than 4E-4 (2.48055024298921e-08) +OK! xsec from fortran (0.21095842877427595) and cuda (0.21095846337765808) differ by less than 4E-4 (1.640293887383848e-07) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.483989e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.194095e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.415900e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.453243e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.291731e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.153983e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.272401e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.705356e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.300164e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.151283e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.331894e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.697710e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.155572e+07 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.773293e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.467689e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.223076e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 1f173fb3cf..eaa612a29b 100644 --- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make USEBUILDDIR=1 BACKEND=cuda +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu' OMP_NUM_THREADS= -DATE: 2024-10-04_12:10:43 +DATE: 2024-10-03_00:14:20 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 506 events (found 1943 events) - [COUNTERS] PROGRAM TOTAL : 0.3650s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3179s - [COUNTERS] Fortran MEs ( 1 ) : 0.0471s for 8192 events => throughput is 1.74E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.5085s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4389s + [COUNTERS] Fortran MEs ( 1 ) : 0.0696s for 8192 events => throughput is 1.18E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x1_fortran > /tmp/valassia/output_gqttq_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/avalassi/output_gqttq_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701704456871] fbridge_mode=0 + [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3363s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2895s - [COUNTERS] Fortran MEs ( 1 ) : 0.0468s for 8192 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4620s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3921s + [COUNTERS] Fortran MEs ( 1 ) : 0.0698s for 8192 events => throughput is 1.17E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_gqttq_x10_fortran > /tmp/valassia/output_gqttq_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/avalassi/output_gqttq_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376575781] fbridge_mode=0 + [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.7140s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2460s - [COUNTERS] Fortran MEs ( 1 ) : 0.4680s for 81920 events => throughput is 1.75E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.5215s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8261s + [COUNTERS] Fortran MEs ( 1 ) : 0.6954s for 81920 events => throughput is 1.18E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701694845307] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504495344831] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3484s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2922s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0558s for 8192 events => throughput is 1.47E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 0.4810s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4047s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0757s for 8192 events => throughput is 1.08E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701694845307) differ by less than 2E-4 (4.731567360138911e-10) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344831) differ by less than 2E-4 (5.115954326839756e-10) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376532396] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.8043s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2480s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5558s for 81920 events => throughput is 1.47E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + [COUNTERS] PROGRAM TOTAL : 2.6118s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8599s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.7512s for 81920 events => throughput is 1.09E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376532396) differ by less than 2E-4 (2.05657713081564e-12) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.486112e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.104505e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.508546e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.100300e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701694845307] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504495344833] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3234s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2914s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0318s for 8192 events => throughput is 2.58E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4404s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3990s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0409s for 8192 events => throughput is 2.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701694845307) differ by less than 2E-4 (4.731567360138911e-10) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344833) differ by less than 2E-4 (5.115952106393706e-10) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771376532396] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.5619s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2439s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3177s for 81920 events => throughput is 2.58E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.2479s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8434s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4040s for 81920 events => throughput is 2.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771376532396) differ by less than 2E-4 (2.05657713081564e-12) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.594398e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.946818e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.583395e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.967726e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701710149187] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.3099s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2934s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0163s for 8192 events => throughput is 5.03E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4208s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3969s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0234s for 8192 events => throughput is 3.50E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and cpp (0.20313701710149187) differ by less than 2E-4 (2.8022051345999444e-10) +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771374576316] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.4109s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2484s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1623s for 81920 events => throughput is 5.05E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0815s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8434s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2376s for 81920 events => throughput is 3.45E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and cpp (0.21095771374576316) differ by less than 2E-4 (9.478029472376193e-11) +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.158200e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.431109e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.419345e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4156s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3944s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0208s for 8192 events => throughput is 3.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.1079s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8862s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2213s for 81920 events => throughput is 3.70E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.868620e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.176426e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.922480e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' +DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1 + [UNWEIGHT] Wrote 499 events (found 1502 events) + [COUNTERS] PROGRAM TOTAL : 0.4329s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3984s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0341s for 8192 events => throughput is 2.40E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' +DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1 + [UNWEIGHT] Wrote 2259 events (found 2264 events) + [COUNTERS] PROGRAM TOTAL : 2.1810s + [COUNTERS] Fortran Overhead ( 0 ) : 1.8478s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3327s for 81920 events => throughput is 2.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.411172e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.413465e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x1_cudacpp > /tmp/valassia/output_gqttq_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x1_cudacpp > /tmp/avalassi/output_gqttq_x1_cudacpp' DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 32/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.2031 [0.20313701710728185] fbridge_mode=1 + [XSECTION] Cross section = 0.2031 [0.20313504512110778] fbridge_mode=1 [UNWEIGHT] Wrote 499 events (found 1502 events) - [COUNTERS] PROGRAM TOTAL : 0.5973s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5787s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 8192 events => throughput is 1.06E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0109s + [COUNTERS] PROGRAM TOTAL : 0.8355s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.64E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.20313701704456871) and hip (0.20313701710728185) differ by less than 2E-4 (3.087232691711961e-10) +OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504512110778) differ by less than 2E-4 (3.1376434783680907e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_gqttq_x10_cudacpp > /tmp/valassia/output_gqttq_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp' DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 - [NGOODHEL] ngoodhel/ncomb = 32/32 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/32 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 0.211 [0.21095771372611694] fbridge_mode=1 + [XSECTION] Cross section = 0.211 [0.21095842873460982] fbridge_mode=1 [UNWEIGHT] Wrote 2259 events (found 2264 events) - [COUNTERS] PROGRAM TOTAL : 1.5742s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5233s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0402s for 81920 events => throughput is 2.04E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0107s + [COUNTERS] PROGRAM TOTAL : 2.2766s + [COUNTERS] Fortran Overhead ( 0 ) : 2.2655s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 81920 events => throughput is 8.22E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.21095771376575781) and hip (0.21095771372611694) differ by less than 2E-4 (1.8790913269839393e-10) +OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842873460982) differ by less than 2E-4 (1.8802814860663375e-10) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.055221e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.015948e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.924559e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.328513e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.490650e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.335551e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.821752e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.198409e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.537152e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.343564e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.812009e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.282279e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.450255e+06 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.337961e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SM_GU_TTXU_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.274935e+05 ) sec^-1 +Process = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.656673e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index 46f4c2db0c..a6c1729b94 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-10-04_14:13:12 +DATE: 2024-10-03_03:45:28 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 1.3108s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2785s - [COUNTERS] Fortran MEs ( 1 ) : 0.0323s for 8192 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9406s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8948s + [COUNTERS] Fortran MEs ( 1 ) : 0.0458s for 8192 events => throughput is 1.79E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.8811s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8489s - [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4425s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3963s + [COUNTERS] Fortran MEs ( 1 ) : 0.0462s for 8192 events => throughput is 1.77E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.6334s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3116s - [COUNTERS] Fortran MEs ( 1 ) : 0.3217s for 81920 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9883s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5220s + [COUNTERS] Fortran MEs ( 1 ) : 0.4663s for 81920 events => throughput is 1.76E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755334] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755170] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.9361s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9003s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0356s for 8192 events => throughput is 2.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4465s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3954s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0506s for 8192 events => throughput is 1.62E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755334) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755170) differ by less than 3E-14 (6.661338147750939e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865325] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.7401s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3838s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3561s for 81920 events => throughput is 2.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 2.0112s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5194s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4914s for 81920 events => throughput is 1.67E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865325) differ by less than 3E-14 (1.1102230246251565e-14) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.303016e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.689411e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.356917e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.699143e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755347] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.7265s - [COUNTERS] Fortran Overhead ( 0 ) : 0.7055s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0207s for 8192 events => throughput is 3.95E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4240s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3969s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0267s for 8192 events => throughput is 3.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755347) differ by less than 3E-14 (8.881784197001252e-16) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755183) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865338] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.5576s - [COUNTERS] Fortran Overhead ( 0 ) : 4.3402s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2172s for 81920 events => throughput is 3.77E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7904s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5236s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2664s for 81920 events => throughput is 3.07E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865338) differ by less than 3E-14 (1.0436096431476471e-14) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.905511e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.025435e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.063650e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.992417e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755325] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.6423s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6308s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0113s for 8192 events => throughput is 7.24E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4119s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3956s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0159s for 8192 events => throughput is 5.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081479755325) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.3083s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1955s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1126s for 81920 events => throughput is 7.27E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6859s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5201s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1654s for 81920 events => throughput is 4.95E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713375865552) differ by less than 3E-14 (0.0) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.441710e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.043958e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.982138e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3970s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0149s for 8192 events => throughput is 5.51E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755165) differ by less than 3E-14 (8.881784197001252e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1 + [UNWEIGHT] Wrote 1707 events (found 1712 events) + [COUNTERS] PROGRAM TOTAL : 1.6925s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5412s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1509s for 81920 events => throughput is 5.43E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.422693e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.483571e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.463064e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0160081479755179] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4198s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3970s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0224s for 8192 events => throughput is 3.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081479755179) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1 + [UNWEIGHT] Wrote 1707 events (found 1712 events) + [COUNTERS] PROGRAM TOTAL : 1.7497s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5314s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2179s for 81920 events => throughput is 3.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.594328e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.651571e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755356] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081479755192] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.9220s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9070s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.37E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0091s + [COUNTERS] PROGRAM TOTAL : 0.8424s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8384s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.55E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and hip (2.0160081479755356) differ by less than 3E-14 (1.3322676295501878e-15) +OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081479755192) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865352] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713375865294] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.4975s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4693s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0185s for 81920 events => throughput is 4.43E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0098s + [COUNTERS] PROGRAM TOTAL : 1.9702s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9603s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0091s for 81920 events => throughput is 9.01E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and hip (2.0336713375865352) differ by less than 3E-14 (9.880984919163893e-15) +OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713375865294) differ by less than 3E-14 (4.440892098500626e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.431899e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.955075e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.357175e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.400755e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.490479e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.826601e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.228740e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.117685e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.486032e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.829763e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.607853e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.475228e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.477472e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.836271e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.529467e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.541450e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index fb2002923f..ab10ba65ee 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone +make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-10-04_14:13:51 +DATE: 2024-10-03_03:45:56 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 0.9546s - [COUNTERS] Fortran Overhead ( 0 ) : 0.9225s - [COUNTERS] Fortran MEs ( 1 ) : 0.0321s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9331s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8867s + [COUNTERS] Fortran MEs ( 1 ) : 0.0464s for 8192 events => throughput is 1.77E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.6395s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6073s - [COUNTERS] Fortran MEs ( 1 ) : 0.0322s for 8192 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4488s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4019s + [COUNTERS] Fortran MEs ( 1 ) : 0.0469s for 8192 events => throughput is 1.75E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.4604s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1387s - [COUNTERS] Fortran MEs ( 1 ) : 0.3217s for 81920 events => throughput is 2.55E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9841s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5204s + [COUNTERS] Fortran MEs ( 1 ) : 0.4638s for 81920 events => throughput is 1.77E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,34 +124,34 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160406546722180] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160406825242951] fbridge_mode=1 [UNWEIGHT] Wrote 1653 events (found 1658 events) - [COUNTERS] PROGRAM TOTAL : 0.6404s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6085s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 8192 events => throughput is 2.58E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4519s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4050s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0466s for 8192 events => throughput is 1.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160406546722180) differ by less than 4E-4 (1.61242883456314e-05) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160406825242951) differ by less than 4E-4 (1.6138103811513815e-05) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ! -diff /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 +diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20 7562,7575d7561 < 4 1 1E-03 0.1250010E+03 0.7546771E-02 0.1235066E+00 -< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499550E+02 0.71320499550E+02 0.00000000000E+00 0. 1. -< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.54771239731E+02 0.54771239731E+02 0.00000000000E+00 0. 1. -< 5 1 1 2 501 0 0.50303102232E+02 0.36190119942E+02 0.14973002962E+02 0.63925016178E+02 0.47000000000E+01 0. -1. -< -5 1 1 2 0 501 -0.50303102232E+02 -0.36190119942E+02 0.15762568567E+01 0.62166723103E+02 0.47000000000E+01 0. -1. +< 21 -1 0 0 503 502 0.00000000000E+00 0.00000000000E+00 0.71320499473E+02 0.71320499473E+02 0.00000000000E+00 0. 1. +< 21 -1 0 0 502 503 -0.00000000000E+00 -0.00000000000E+00 -0.54771239790E+02 0.54771239790E+02 0.00000000000E+00 0. 1. +< 5 1 1 2 501 0 0.50303102232E+02 0.36190119942E+02 0.14973002893E+02 0.63925016162E+02 0.47000000000E+01 0. -1. +< -5 1 1 2 0 501 -0.50303102232E+02 -0.36190119942E+02 0.15762567893E+01 0.62166723101E+02 0.47000000000E+01 0. -1. < < 0 0.12500099E+03 < 0 diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 4d77d149f7..f07c5f8fb7 100644 --- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx + +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=hip -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' - -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' OMP_NUM_THREADS= -DATE: 2024-10-04_14:14:00 +DATE: 2024-10-03_03:46:02 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 3371 events (found 6399 events) - [COUNTERS] PROGRAM TOTAL : 1.1257s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0933s - [COUNTERS] Fortran MEs ( 1 ) : 0.0324s for 8192 events => throughput is 2.53E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.9413s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8957s + [COUNTERS] Fortran MEs ( 1 ) : 0.0456s for 8192 events => throughput is 1.79E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x1_fortran > /tmp/valassia/output_heftggbb_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/avalassi/output_heftggbb_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081479755330] fbridge_mode=0 + [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.6437s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6116s - [COUNTERS] Fortran MEs ( 1 ) : 0.0320s for 8192 events => throughput is 2.56E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4467s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4006s + [COUNTERS] Fortran MEs ( 1 ) : 0.0461s for 8192 events => throughput is 1.78E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_heftggbb_x10_fortran > /tmp/valassia/output_heftggbb_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp/avalassi/output_heftggbb_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713375865552] fbridge_mode=0 + [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.4523s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1304s - [COUNTERS] Fortran MEs ( 1 ) : 0.3220s for 81920 events => throughput is 2.54E+05 events/s + [COUNTERS] PROGRAM TOTAL : 2.0497s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5663s + [COUNTERS] Fortran MEs ( 1 ) : 0.4834s for 81920 events => throughput is 1.69E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,25 +124,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081964453460] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081964453331] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.6429s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6069s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0357s for 8192 events => throughput is 2.29E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + [COUNTERS] PROGRAM TOTAL : 0.4441s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3939s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0498s for 8192 events => throughput is 1.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081964453460) differ by less than 2E-4 (2.4042468904639236e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453331) differ by less than 2E-4 (2.4042469792817656e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -160,25 +160,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713843200616] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713843200420] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.4982s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1423s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3556s for 81920 events => throughput is 2.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 2.0264s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5298s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4961s for 81920 events => throughput is 1.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713843200616) differ by less than 2E-4 (2.297987178323524e-08) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200420) differ by less than 2E-4 (2.2979875113904313e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -187,15 +187,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.260726e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.571027e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.291412e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.590282e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -209,25 +209,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081964453469] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081964453336] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.6779s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6568s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0209s for 8192 events => throughput is 3.92E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4241s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3968s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 8192 events => throughput is 3.03E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081964453469) differ by less than 2E-4 (2.4042469348728446e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081964453336) differ by less than 2E-4 (2.404247001486226e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -245,25 +245,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713843200620] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713843200425] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.3653s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1576s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2075s for 81920 events => throughput is 3.95E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7845s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5165s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2676s for 81920 events => throughput is 3.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713843200620) differ by less than 2E-4 (2.2979872005279844e-08) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200425) differ by less than 2E-4 (2.2979875335948918e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -272,15 +272,15 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.881285e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.828390e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.024699e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.883903e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -294,25 +294,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081962974865] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.6276s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6164s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 8192 events => throughput is 7.38E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4160s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3987s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0169s for 8192 events => throughput is 4.84E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and cpp (2.0160081962974865) differ by less than 2E-4 (2.3969126017320264e-08) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -330,25 +330,25 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713836598834] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.2440s - [COUNTERS] Fortran Overhead ( 0 ) : 4.1333s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1106s for 81920 events => throughput is 7.41E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6964s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5293s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1667s for 81920 events => throughput is 4.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and cpp (2.0336713836598834) differ by less than 2E-4 (2.2655247899905362e-08) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -357,23 +357,102 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.263617e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.810097e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.697282e+05 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.776953e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.4132s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3979s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.45E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962974745) differ by less than 2E-4 (2.3969127349587893e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1 + [UNWEIGHT] Wrote 1707 events (found 1712 events) + [COUNTERS] PROGRAM TOTAL : 1.6608s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5101s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1503s for 81920 events => throughput is 5.45E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.113673e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.135155e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -385,30 +464,31 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x1_cudacpp > /tmp/valassia/output_heftggbb_x1_cudacpp' +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.016 [2.0160081483021464] fbridge_mode=1 + [XSECTION] Cross section = 2.016 [2.0160081962970020] fbridge_mode=1 [UNWEIGHT] Wrote 1652 events (found 1657 events) - [COUNTERS] PROGRAM TOTAL : 0.9033s - [COUNTERS] Fortran Overhead ( 0 ) : 0.8885s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0060s for 8192 events => throughput is 1.36E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0087s + [COUNTERS] PROGRAM TOTAL : 0.4199s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3970s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0225s for 8192 events => throughput is 3.64E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0160081479755330) and hip (2.0160081483021464) differ by less than 2E-4 (1.6200996100224074e-10) +OK! xsec from fortran (2.0160081479755183) and cpp (2.0160081962970020) differ by less than 2E-4 (2.3968893092529697e-08) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -420,67 +500,153 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_heftggbb_x10_cudacpp > /tmp/valassia/output_heftggbb_x10_cudacpp' +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 2.034 [2.0336713380111582] fbridge_mode=1 + [XSECTION] Cross section = 2.034 [2.0336713836598515] fbridge_mode=1 [UNWEIGHT] Wrote 1707 events (found 1712 events) - [COUNTERS] PROGRAM TOTAL : 4.5065s - [COUNTERS] Fortran Overhead ( 0 ) : 4.4788s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 81920 events => throughput is 4.40E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0091s + [COUNTERS] PROGRAM TOTAL : 1.7604s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5294s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2307s for 81920 events => throughput is 3.55E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (2.0336713375865552) and hip (2.0336713380111582) differ by less than 2E-4 (2.0878654360956261e-10) +OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598515) differ by less than 2E-4 (2.2655245235370103e-08) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.151070e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.343164e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x1_cudacpp > /tmp/avalassi/output_heftggbb_x1_cudacpp' +DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.016 [2.0160081483021330] fbridge_mode=1 + [UNWEIGHT] Wrote 1652 events (found 1657 events) + [COUNTERS] PROGRAM TOTAL : 0.8378s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8340s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0160081479755183) and cuda (2.0160081483021330) differ by less than 2E-4 (1.6201062713605552e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp' +DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 2.034 [2.0336713380111449] fbridge_mode=1 + [UNWEIGHT] Wrote 1707 events (found 1712 events) + [COUNTERS] PROGRAM TOTAL : 1.9761s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9663s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.06E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713380111449) differ by less than 2E-4 (2.0879298290310544e-10) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.436985e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.928935e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.357929e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.339519e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.491674e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.817995e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.313248e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.148245e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.496904e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.818249e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.622089e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.450546e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.483481e+07 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.807173e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.597049e+06 ) sec^-1 +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.482355e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index cd23937ee4..892b3fd5e1 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-10-04_14:16:23 +DATE: 2024-10-03_03:49:14 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 1.8274s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3917s - [COUNTERS] Fortran MEs ( 1 ) : 1.4357s for 8192 events => throughput is 5.71E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5790s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3507s + [COUNTERS] Fortran MEs ( 1 ) : 2.2283s for 8192 events => throughput is 3.68E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.6752s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2615s - [COUNTERS] Fortran MEs ( 1 ) : 1.4138s for 8192 events => throughput is 5.79E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5936s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3553s + [COUNTERS] Fortran MEs ( 1 ) : 2.2383s for 8192 events => throughput is 3.66E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 15.7401s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4278s - [COUNTERS] Fortran MEs ( 1 ) : 14.3123s for 81920 events => throughput is 5.72E+03 events/s + [COUNTERS] PROGRAM TOTAL : 24.3811s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0276s + [COUNTERS] Fortran MEs ( 1 ) : 22.3535s for 81920 events => throughput is 3.66E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728557E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.9585s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2934s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.6618s for 8192 events => throughput is 4.93E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0033s + [COUNTERS] PROGRAM TOTAL : 2.7630s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3551s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4030s for 8192 events => throughput is 3.41E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728557E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898222E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 19.0374s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4244s - [COUNTERS] CudaCpp MEs ( 2 ) : 17.6097s for 81920 events => throughput is 4.65E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0033s + [COUNTERS] PROGRAM TOTAL : 26.1710s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0362s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.1298s for 81920 events => throughput is 3.39E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0050s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898222E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898148E-007) differ by less than 3E-14 (0.0) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.543929e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.559366e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.559679e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.558371e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728610E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.3005s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3388s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9597s for 8192 events => throughput is 8.54E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s + [COUNTERS] PROGRAM TOTAL : 1.6265s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3630s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2607s for 8192 events => throughput is 6.50E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0028s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728536E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728610E-007) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898275E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898191E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 11.0954s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5152s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5781s for 81920 events => throughput is 8.55E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s + [COUNTERS] PROGRAM TOTAL : 14.5841s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0288s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.5525s for 81920 events => throughput is 6.53E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898275E-007) differ by less than 3E-14 (4.440892098500626e-16) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898191E-007) differ by less than 3E-14 (4.440892098500626e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.885187e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.776735e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.823104e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.761919e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728525E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7435s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2974s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4451s for 8192 events => throughput is 1.84E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 0.9120s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3562s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5544s for 8192 events => throughput is 1.48E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381610362728525E-007) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898233E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 6.0265s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5817s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.4437s for 81920 events => throughput is 1.84E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 7.5522s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0125s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5382s for 81920 events => throughput is 1.48E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542926582898233E-007) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.954606e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.525780e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.523425e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8665s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3590s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5061s for 8192 events => throughput is 1.62E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 6.9898s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0215s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.9669s for 81920 events => throughput is 1.65E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.714953e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.045230e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.727026e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.9876s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3520s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6339s for 8192 events => throughput is 1.29E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 8.3820s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0136s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.3668s for 81920 events => throughput is 1.29E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0016s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.309804e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.309629e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728514E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728578E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8018s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6803s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0647s for 8192 events => throughput is 1.27E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0569s + [COUNTERS] PROGRAM TOTAL : 0.8331s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7937s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0197s for 8192 events => throughput is 4.16E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0197s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381610362728514E-007) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610362728578E-007) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.2955s - [COUNTERS] Fortran Overhead ( 0 ) : 1.6338s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6060s for 81920 events => throughput is 1.35E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0557s + [COUNTERS] PROGRAM TOTAL : 2.6470s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4512s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1762s for 81920 events => throughput is 4.65E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0196s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6542926582898244E-007) differ by less than 3E-14 (0.0) +OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.285923e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.238235e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.807814e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.533678e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.813672e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.854781e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.210704e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.206482e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.821036e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.790740e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.262482e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.229997e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.814985e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.764026e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.225752e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.687249e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index a6801e5689..1da536828f 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-10-04_14:18:29 +DATE: 2024-10-03_03:51:30 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 1.7715s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2563s - [COUNTERS] Fortran MEs ( 1 ) : 1.5152s for 8192 events => throughput is 5.41E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5732s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3484s + [COUNTERS] Fortran MEs ( 1 ) : 2.2248s for 8192 events => throughput is 3.68E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.6636s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2590s - [COUNTERS] Fortran MEs ( 1 ) : 1.4046s for 8192 events => throughput is 5.83E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5858s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3527s + [COUNTERS] Fortran MEs ( 1 ) : 2.2331s for 8192 events => throughput is 3.67E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 15.2868s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3433s - [COUNTERS] Fortran MEs ( 1 ) : 13.9435s for 81920 events => throughput is 5.88E+03 events/s + [COUNTERS] PROGRAM TOTAL : 24.3640s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0153s + [COUNTERS] Fortran MEs ( 1 ) : 22.3487s for 81920 events => throughput is 3.67E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381684214474469E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381686438954397E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.8552s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2751s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.5760s for 8192 events => throughput is 5.20E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0040s + [COUNTERS] PROGRAM TOTAL : 2.7241s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3585s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.3605s for 8192 events => throughput is 3.47E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381684214474469E-007) differ by less than 4E-4 (9.668786189465095e-07) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686438954397E-007) differ by less than 4E-4 (9.960018576560259e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542976447681378E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542978900095690E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 18.4227s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4162s - [COUNTERS] CudaCpp MEs ( 2 ) : 17.0033s for 81920 events => throughput is 4.82E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0031s + [COUNTERS] PROGRAM TOTAL : 25.6088s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0243s + [COUNTERS] CudaCpp MEs ( 2 ) : 23.5796s for 81920 events => throughput is 3.47E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542976447681378E-007) differ by less than 4E-4 (6.514616746056134e-07) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542978900095690E-007) differ by less than 4E-4 (6.835014008110818e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.678196e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.595330e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.691049e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.592962e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381673102586798E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381671483253128E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.8144s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3119s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.5007s for 8192 events => throughput is 1.64E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0018s + [COUNTERS] PROGRAM TOTAL : 1.0090s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3576s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6499s for 8192 events => throughput is 1.26E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381673102586798E-007) differ by less than 4E-4 (8.214000459805249e-07) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381671483253128E-007) differ by less than 4E-4 (8.001994753481512e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542965612263376E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542962735029303E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 6.4975s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5274s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.9690s for 81920 events => throughput is 1.65E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 8.5774s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0289s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.5470s for 81920 events => throughput is 1.25E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542965612263376E-007) differ by less than 4E-4 (5.09901657563816e-07) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542962735029303E-007) differ by less than 4E-4 (4.7231184874263477e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.691506e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.276959e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.683782e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.272430e+04 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381674937970992E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.5370s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3041s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2322s for 8192 events => throughput is 3.53E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 0.6541s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3627s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2905s for 8192 events => throughput is 2.82E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381674937970992E-007) differ by less than 4E-4 (8.454291831050398e-07) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542993199513089E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 3.8381s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5389s - [COUNTERS] CudaCpp MEs ( 2 ) : 2.2986s for 81920 events => throughput is 3.56E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + [COUNTERS] PROGRAM TOTAL : 4.8460s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0109s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.8342s for 81920 events => throughput is 2.89E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0009s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542993199513089E-007) differ by less than 4E-4 (8.703170601975785e-07) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.666190e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.994182e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.988531e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.6112s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3544s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2560s for 8192 events => throughput is 3.20E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 4.5717s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0031s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.5679s for 81920 events => throughput is 3.19E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.282515e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.650647e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.307160e+04 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381686320975603E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.6848s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3570s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.3268s for 8192 events => throughput is 2.51E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686320975603E-007) differ by less than 4E-4 (9.944572607611946e-07) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6543004237976207E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 5.2685s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0219s + [COUNTERS] CudaCpp MEs ( 2 ) : 3.2456s for 81920 events => throughput is 2.52E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0010s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6543004237976207E-007) differ by less than 4E-4 (1.014529774634454e-06) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.530666e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.550885e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381687553340853E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381711031958629E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7076s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6167s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0365s for 8192 events => throughput is 2.24E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0544s + [COUNTERS] PROGRAM TOTAL : 0.8332s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7964s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 8192 events => throughput is 4.18E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381687553340853E-007) differ by less than 4E-4 (1.0105915801972287e-06) +OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381711031958629E-007) differ by less than 4E-4 (1.3179773188376487e-06) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6543007309341497E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6543026921346333E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.3731s - [COUNTERS] Fortran Overhead ( 0 ) : 1.9820s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3363s for 81920 events => throughput is 2.44E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0547s + [COUNTERS] PROGRAM TOTAL : 2.6217s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4453s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1593s for 81920 events => throughput is 5.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0171s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6543007309341497E-007) differ by less than 4E-4 (1.0546558233404113e-06) +OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6543026921346333E-007) differ by less than 4E-4 (1.3108781262705094e-06) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.332012e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.242479e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.661724e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.443260e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.665894e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.299498e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.497446e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.323299e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.664462e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.300630e+06 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.326834e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.333556e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 9.632827e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.292961e+06 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.430627e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.657294e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index de2ab0c200..bec5746083 100644 --- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' + make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' OMP_NUM_THREADS= -DATE: 2024-10-04_14:20:09 +DATE: 2024-10-03_03:53:23 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1 events (found 902 events) - [COUNTERS] PROGRAM TOTAL : 1.8730s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2870s - [COUNTERS] Fortran MEs ( 1 ) : 1.5860s for 8192 events => throughput is 5.17E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5908s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3522s + [COUNTERS] Fortran MEs ( 1 ) : 2.2386s for 8192 events => throughput is 3.66E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x1_fortran > /tmp/valassia/output_smeftggtttt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /tmp/avalassi/output_smeftggtttt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610362728536E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.8889s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2903s - [COUNTERS] Fortran MEs ( 1 ) : 1.5986s for 8192 events => throughput is 5.12E+03 events/s + [COUNTERS] PROGRAM TOTAL : 2.5989s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3536s + [COUNTERS] Fortran MEs ( 1 ) : 2.2453s for 8192 events => throughput is 3.65E+03 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_smeftggtttt_x10_fortran > /tmp/valassia/output_smeftggtttt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /tmp/avalassi/output_smeftggtttt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926582898244E-007] fbridge_mode=0 + [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 17.5690s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5059s - [COUNTERS] Fortran MEs ( 1 ) : 16.0631s for 81920 events => throughput is 5.10E+03 events/s + [COUNTERS] PROGRAM TOTAL : 24.4959s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0436s + [COUNTERS] Fortran MEs ( 1 ) : 22.4523s for 81920 events => throughput is 3.65E+03 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608764955570E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608764955655E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 2.1883s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3189s - [COUNTERS] CudaCpp MEs ( 2 ) : 1.8656s for 8192 events => throughput is 4.39E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0037s + [COUNTERS] PROGRAM TOTAL : 2.7880s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3539s + [COUNTERS] CudaCpp MEs ( 2 ) : 2.4293s for 8192 events => throughput is 3.37E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0048s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608764955570E-007) differ by less than 2E-4 (2.0918293763827478e-08) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608764955655E-007) differ by less than 2E-4 (2.0918293319738268e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925018181723E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542925018181681E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 20.1819s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5129s - [COUNTERS] CudaCpp MEs ( 2 ) : 18.6654s for 81920 events => throughput is 4.39E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0037s + [COUNTERS] PROGRAM TOTAL : 26.4223s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0309s + [COUNTERS] CudaCpp MEs ( 2 ) : 24.3863s for 81920 events => throughput is 3.36E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0051s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542925018181723E-007) differ by less than 2E-4 (2.0442339820903044e-08) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925018181681E-007) differ by less than 2E-4 (2.044233915476923e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.595880e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.446996e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.584557e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.474680e+03 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608686521537E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608686521600E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 1.2548s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3190s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.9338s for 8192 events => throughput is 8.77E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s + [COUNTERS] PROGRAM TOTAL : 1.6449s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3685s + [COUNTERS] CudaCpp MEs ( 2 ) : 1.2736s for 8192 events => throughput is 6.43E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0027s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608686521537E-007) differ by less than 2E-4 (2.194516446341055e-08) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608686521600E-007) differ by less than 2E-4 (2.1945164241365944e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542924921991233E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542924921991264E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 11.0387s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5040s - [COUNTERS] CudaCpp MEs ( 2 ) : 9.5327s for 81920 events => throughput is 8.59E+03 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s + [COUNTERS] PROGRAM TOTAL : 14.5911s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0528s + [COUNTERS] CudaCpp MEs ( 2 ) : 12.5358s for 81920 events => throughput is 6.53E+03 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0026s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542924921991233E-007) differ by less than 2E-4 (2.1699026797605825e-08) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542924921991264E-007) differ by less than 2E-4 (2.1699025132271288e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.975960e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.890337e+03 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.924543e+03 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.047724e+03 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381608826200382E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7407s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2994s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.4394s for 8192 events => throughput is 1.86E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0020s + [COUNTERS] PROGRAM TOTAL : 0.9065s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3553s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.5497s for 8192 events => throughput is 1.49E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and cpp (7.6381608826200382E-007) differ by less than 2E-4 (2.0116467158715068e-08) +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542925056010384E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 5.9216s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5372s - [COUNTERS] CudaCpp MEs ( 2 ) : 4.3833s for 81920 events => throughput is 1.87E+04 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0011s + [COUNTERS] PROGRAM TOTAL : 7.5428s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0133s + [COUNTERS] CudaCpp MEs ( 2 ) : 5.5280s for 81920 events => throughput is 1.48E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0015s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and cpp (7.6542925056010384E-007) differ by less than 2E-4 (1.9948124929669575e-08) +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.918930e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.522237e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.532222e+04 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.8421s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3547s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4860s for 8192 events => throughput is 1.69E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0014s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 6.9661s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0341s + [COUNTERS] CudaCpp MEs ( 2 ) : 4.9307s for 81920 events => throughput is 1.66E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0013s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.729032e+04 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.920051e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.749814e+04 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 230 events (found 851 events) + [COUNTERS] PROGRAM TOTAL : 0.9989s + [COUNTERS] Fortran Overhead ( 0 ) : 0.3556s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.6416s for 8192 events => throughput is 1.28E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 64/64 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1 + [UNWEIGHT] Wrote 1679 events (found 1684 events) + [COUNTERS] PROGRAM TOTAL : 8.5360s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0345s + [COUNTERS] CudaCpp MEs ( 2 ) : 6.4998s for 81920 events => throughput is 1.26E+04 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0017s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.215280e+04 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.243322e+04 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x1_cudacpp > /tmp/valassia/output_smeftggtttt_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x1_cudacpp > /tmp/avalassi/output_smeftggtttt_x1_cudacpp' DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.638e-07 [7.6381610372590265E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.638e-07 [7.6381610372590318E-007] fbridge_mode=1 [UNWEIGHT] Wrote 230 events (found 851 events) - [COUNTERS] PROGRAM TOTAL : 0.7957s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6731s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0643s for 8192 events => throughput is 1.27E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0582s + [COUNTERS] PROGRAM TOTAL : 0.8391s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7995s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0198s for 8192 events => throughput is 4.14E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6381610362728536E-007) and hip (7.6381610372590265E-007) differ by less than 2E-4 (1.2911138824733825e-10) +OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610372590318E-007) differ by less than 2E-4 (1.2911138824733825e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_smeftggtttt_x10_cudacpp > /tmp/valassia/output_smeftggtttt_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp' DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 64/64 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 7.654e-07 [7.6542926581386322E-007] fbridge_mode=1 + [XSECTION] Cross section = 7.654e-07 [7.6542926581386226E-007] fbridge_mode=1 [UNWEIGHT] Wrote 1679 events (found 1684 events) - [COUNTERS] PROGRAM TOTAL : 2.5113s - [COUNTERS] Fortran Overhead ( 0 ) : 1.8511s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.6018s for 81920 events => throughput is 1.36E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0583s + [COUNTERS] PROGRAM TOTAL : 2.6398s + [COUNTERS] Fortran Overhead ( 0 ) : 2.4432s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1768s for 81920 events => throughput is 4.63E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0198s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (7.6542926582898244E-007) and hip (7.6542926581386322E-007) differ by less than 2E-4 (1.9752643964920935e-11) +OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926581386226E-007) differ by less than 2E-4 (1.9752643964920935e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.288285e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.207682e+05 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.774779e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.525707e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.826375e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.691636e+05 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.219655e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.175385e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.826503e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.807412e+05 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.240808e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.198574e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.834278e+05 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 8.764129e+05 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.256536e+04 ) sec^-1 +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.676928e+05 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index deec2c77b7..60dc72a754 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make USEBUILDDIR=1 BACKEND=cppsse4 -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-10-04_14:15:33 +DATE: 2024-10-03_03:47:55 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.5953s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5894s - [COUNTERS] Fortran MEs ( 1 ) : 0.0059s for 8192 events => throughput is 1.40E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6671s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6586s + [COUNTERS] Fortran MEs ( 1 ) : 0.0085s for 8192 events => throughput is 9.60E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3126s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3065s - [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.35E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4144s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4060s + [COUNTERS] Fortran MEs ( 1 ) : 0.0084s for 8192 events => throughput is 9.79E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1278s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0728s - [COUNTERS] Fortran MEs ( 1 ) : 0.0550s for 81920 events => throughput is 1.49E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6469s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5651s + [COUNTERS] Fortran MEs ( 1 ) : 0.0818s for 81920 events => throughput is 1.00E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,9 +124,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3345s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3276s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0067s for 8192 events => throughput is 1.23E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4228s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4141s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 8192 events => throughput is 9.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1437s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0774s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0661s for 81920 events => throughput is 1.24E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6561s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5734s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0824s for 81920 events => throughput is 9.94E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207288) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.260666e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.009926e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.270880e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.018079e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,9 +204,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3163s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3129s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0032s for 8192 events => throughput is 2.52E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4184s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4135s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0046s for 8192 events => throughput is 1.78E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1171s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0846s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0324s for 81920 events => throughput is 2.53E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6135s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5696s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 81920 events => throughput is 1.88E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207288) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.596609e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.910107e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.878355e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.994596e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3211s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3191s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.34E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4110s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4079s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.98E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426120) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310722207294] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1406s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1209s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0196s for 81920 events => throughput is 4.19E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.5890s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5608s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 81920 events => throughput is 2.95E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207294) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.731833e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.069685e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.335637e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4090s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.94E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.5957s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5688s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0265s for 81920 events => throughput is 3.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.242302e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.966724e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.339112e+06 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4220s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4182s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0033s for 8192 events => throughput is 2.48E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.6593s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6273s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 81920 events => throughput is 2.59E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.878268e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.129733e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452343426109] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.6393s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6268s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 8192 events => throughput is 1.59E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0074s + [COUNTERS] PROGRAM TOTAL : 0.8486s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8450s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.67E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and hip (0.30449452343426120) differ by less than 3E-14 (0.0) +OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452343426109) differ by less than 3E-14 (3.3306690738754696e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,9 +559,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -415,57 +569,59 @@ DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.4474s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4306s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 81920 events => throughput is 9.93E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0085s + [COUNTERS] PROGRAM TOTAL : 2.0157s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0075s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0077s for 81920 events => throughput is 1.07E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and hip (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) +OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.585053e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.231093e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.572497e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.601013e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.485723e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.487661e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.923782e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.923690e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.300903e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.473112e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.447522e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.866909e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.419170e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.525381e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.906699e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.225466e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 50a82667f2..40e043e263 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-10-04_14:15:50 +DATE: 2024-10-03_03:48:21 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.4774s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4720s - [COUNTERS] Fortran MEs ( 1 ) : 0.0054s for 8192 events => throughput is 1.53E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6695s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6613s + [COUNTERS] Fortran MEs ( 1 ) : 0.0082s for 8192 events => throughput is 1.00E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3169s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3108s - [COUNTERS] Fortran MEs ( 1 ) : 0.0061s for 8192 events => throughput is 1.34E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4107s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4028s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.03E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1755s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1195s - [COUNTERS] Fortran MEs ( 1 ) : 0.0560s for 81920 events => throughput is 1.46E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6449s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5625s + [COUNTERS] Fortran MEs ( 1 ) : 0.0824s for 81920 events => throughput is 9.94E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446601800423] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446496609361] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3278s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3213s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0064s for 8192 events => throughput is 1.28E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4150s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4064s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0084s for 8192 events => throughput is 9.76E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446601800423) differ by less than 4E-4 (1.8856252759213987e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446496609361) differ by less than 4E-4 (1.9201714018812766e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305123565710] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305007079218] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1684s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1100s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0582s for 81920 events => throughput is 1.41E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.6513s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5705s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0806s for 81920 events => throughput is 1.02E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305123565710) differ by less than 4E-4 (1.8208556928911435e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305007079218) differ by less than 4E-4 (1.858740792393121e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.375769e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.019290e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.585958e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.014848e+06 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446481959741] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446369440458] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3561s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3539s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0021s for 8192 events => throughput is 3.86E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4189s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4158s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0028s for 8192 events => throughput is 2.91E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446481959741) differ by less than 4E-4 (1.924982528933583e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446369440458) differ by less than 4E-4 (1.961935339744869e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305120129920] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747304961041555] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1309s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1120s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0187s for 81920 events => throughput is 4.37E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.6073s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5801s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0270s for 81920 events => throughput is 3.04E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305120129920) differ by less than 4E-4 (1.8219731212631984e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747304961041555) differ by less than 4E-4 (1.8737136997515336e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.723661e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.109785e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.560242e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.217004e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446707997274] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3433s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0014s for 8192 events => throughput is 5.77E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4105s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4085s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.63E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446707997274) differ by less than 4E-4 (1.8507488352970114e-07) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747305200358782] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1239s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1108s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 81920 events => throughput is 6.30E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.5830s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5644s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0184s for 81920 events => throughput is 4.46E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305200358782) differ by less than 4E-4 (1.7958801523665358e-07) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.778488e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.670603e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.314284e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4168s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4147s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0018s for 8192 events => throughput is 4.52E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.5952s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5774s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0176s for 81920 events => throughput is 4.65E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.288976e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.101338e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.607414e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449447031649013] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4090s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4065s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0022s for 8192 events => throughput is 3.81E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449447031649013) differ by less than 4E-4 (1.744457354124762e-07) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747305508949557] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.6041s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5837s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0201s for 81920 events => throughput is 4.08E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305508949557) differ by less than 4E-4 (1.6955166515231213e-07) -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.367008e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.649645e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449446257236112] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449447352014630] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.6004s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5879s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0051s for 8192 events => throughput is 1.61E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0074s + [COUNTERS] PROGRAM TOTAL : 0.8469s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8433s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.68E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and hip (0.30449446257236112) differ by less than 4E-4 (1.998784719958735e-07) +OK! xsec from fortran (0.30449452343426120) and cuda (0.30449447352014630) differ by less than 4E-4 (1.639245078566276e-07) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747304644712603] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747305761315818] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.5346s - [COUNTERS] Fortran Overhead ( 0 ) : 1.5164s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0100s for 81920 events => throughput is 8.16E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0081s + [COUNTERS] PROGRAM TOTAL : 2.0191s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0109s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0076s for 81920 events => throughput is 1.07E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and hip (0.30747304644712603) differ by less than 4E-4 (1.9765939007765354e-07) +OK! xsec from fortran (0.30747310722207288) and cuda (0.30747305761315818) differ by less than 4E-4 (1.6134391445099538e-07) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.740887e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.218779e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.697485e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.617092e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.603233e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.685309e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.026789e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.178696e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.675123e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.647881e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.065938e+08 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.181500e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.798785e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.209271e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.393472e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.664226e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 4928c87d09..b038a0f2b5 100644 --- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone -make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make USEBUILDDIR=1 BACKEND=cppavx2 make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' OMP_NUM_THREADS= -DATE: 2024-10-04_14:16:07 +DATE: 2024-10-03_03:48:47 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,8 +49,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1732 events (found 4297 events) - [COUNTERS] PROGRAM TOTAL : 0.4648s - [COUNTERS] Fortran Overhead ( 0 ) : 0.4594s - [COUNTERS] Fortran MEs ( 1 ) : 0.0054s for 8192 events => throughput is 1.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.6842s + [COUNTERS] Fortran Overhead ( 0 ) : 0.6761s + [COUNTERS] Fortran MEs ( 1 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,8 +74,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tmp/valassia/output_susyggt1t1_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tmp/avalassi/output_susyggt1t1_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x1_fortran > /tm [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3020s - [COUNTERS] Fortran Overhead ( 0 ) : 0.2966s - [COUNTERS] Fortran MEs ( 1 ) : 0.0053s for 8192 events => throughput is 1.54E+06 events/s + [COUNTERS] PROGRAM TOTAL : 0.4107s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4027s + [COUNTERS] Fortran MEs ( 1 ) : 0.0080s for 8192 events => throughput is 1.02E+06 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,8 +99,8 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /tmp/valassia/output_susyggt1t1_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /tmp/avalassi/output_susyggt1t1_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/valassia/input_susyggt1t1_x10_fortran > /t [XSECTION] ChannelId = 3 [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1192s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0653s - [COUNTERS] Fortran MEs ( 1 ) : 0.0539s for 81920 events => throughput is 1.52E+06 events/s + [COUNTERS] PROGRAM TOTAL : 1.6320s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5508s + [COUNTERS] Fortran MEs ( 1 ) : 0.0812s for 81920 events => throughput is 1.01E+06 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453160892020] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3183s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3115s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0066s for 8192 events => throughput is 1.23E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4181s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4096s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0081s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892020) differ by less than 2E-4 (2.6846653566892087e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311535940242] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1472s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0801s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0670s for 81920 events => throughput is 1.22E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6484s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5654s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0827s for 81920 events => throughput is 9.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940242) differ by less than 2E-4 (2.6465174718381945e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.238434e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.742532e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.323227e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.900727e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453160892020] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3161s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3128s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.61E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4122s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4074s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0045s for 8192 events => throughput is 1.83E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892020) differ by less than 2E-4 (2.6846653566892087e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311535940242] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.1111s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0799s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0310s for 81920 events => throughput is 2.64E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6172s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5740s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0428s for 81920 events => throughput is 1.91E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940242) differ by less than 2E-4 (2.6465174718381945e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.904271e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.907045e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.962408e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.053191e+06 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449453251780906] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.3194s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3173s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0019s for 8192 events => throughput is 4.36E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4099s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4068s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0027s for 8192 events => throughput is 3.09E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453251780906) differ by less than 2E-4 (2.98315638858071e-08) +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,200 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747311628550072] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.0984s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0797s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 81920 events => throughput is 4.42E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.5959s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5685s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0271s for 81920 events => throughput is 3.03E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311628550072) differ by less than 2E-4 (2.947714006218405e-08) +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.086035e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.250656e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.571390e+06 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4139s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4111s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0025s for 8192 events => throughput is 3.27E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) + +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.5862s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5600s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0259s for 81920 events => throughput is 3.17E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.389797e+06 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.298072e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.566056e+06 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' +DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1 + [UNWEIGHT] Wrote 1612 events (found 1617 events) + [COUNTERS] PROGRAM TOTAL : 0.4209s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4175s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.71E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' +DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 4/4 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 2 + [XSECTION] ChannelId = 3 + [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1 + [UNWEIGHT] Wrote 1631 events (found 1636 events) + [COUNTERS] PROGRAM TOTAL : 1.6022s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5735s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0284s for 81920 events => throughput is 2.89E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.948781e+06 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.293600e+06 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +524,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x1_cudacpp > /tmp/valassia/output_susyggt1t1_x1_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x1_cudacpp > /tmp/avalassi/output_susyggt1t1_x1_cudacpp' DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3045 [0.30449452360186241] fbridge_mode=1 + [XSECTION] Cross section = 0.3045 [0.30449452360186230] fbridge_mode=1 [UNWEIGHT] Wrote 1612 events (found 1617 events) - [COUNTERS] PROGRAM TOTAL : 0.6589s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6463s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0050s for 8192 events => throughput is 1.63E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0076s + [COUNTERS] PROGRAM TOTAL : 0.8489s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8453s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30449452343426120) and hip (0.30449452360186241) differ by less than 2E-4 (5.504243727472158e-10) +OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452360186230) differ by less than 2E-4 (5.504239286580059e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +559,69 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggt1t1_x10_cudacpp > /tmp/valassia/output_susyggt1t1_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp' DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 4/4 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 2 [XSECTION] ChannelId = 3 - [XSECTION] Cross section = 0.3075 [0.30747310720557375] fbridge_mode=1 + [XSECTION] Cross section = 0.3075 [0.30747310720557364] fbridge_mode=1 [UNWEIGHT] Wrote 1631 events (found 1636 events) - [COUNTERS] PROGRAM TOTAL : 1.3634s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3477s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0078s for 81920 events => throughput is 1.05E+07 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0080s + [COUNTERS] PROGRAM TOTAL : 2.0195s + [COUNTERS] Fortran Overhead ( 0 ) : 2.0110s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0079s for 81920 events => throughput is 1.03E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0005s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (0.30747310722207288) and hip (0.30747310720557375) differ by less than 2E-4 (5.366040944920769e-11) +OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310720557364) differ by less than 2E-4 (5.366074251611508e-11) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.657161e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.199891e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.738885e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.433914e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.485774e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.488918e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.658400e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.917817e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.505719e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.520898e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.902088e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.908547e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.407832e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.523903e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.871336e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.248078e+08 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index abd64571cc..43f72c2971 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip + +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 - make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-04_14:14:36 +DATE: 2024-10-03_03:46:31 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.6834s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6554s - [COUNTERS] Fortran MEs ( 1 ) : 0.0280s for 8192 events => throughput is 2.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8258s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7848s + [COUNTERS] Fortran MEs ( 1 ) : 0.0410s for 8192 events => throughput is 2.00E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3340s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3061s - [COUNTERS] Fortran MEs ( 1 ) : 0.0280s for 8192 events => throughput is 2.93E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4457s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4043s + [COUNTERS] Fortran MEs ( 1 ) : 0.0413s for 8192 events => throughput is 1.98E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.3180s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0370s - [COUNTERS] Fortran MEs ( 1 ) : 0.2810s for 81920 events => throughput is 2.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9606s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5512s + [COUNTERS] Fortran MEs ( 1 ) : 0.4094s for 81920 events => throughput is 2.00E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846964] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3736s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3417s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0317s for 8192 events => throughput is 2.59E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4492s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4058s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0430s for 8192 events => throughput is 1.91E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846950) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846964) differ by less than 3E-14 (2.220446049250313e-16) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473264592444664] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.5164s - [COUNTERS] Fortran Overhead ( 0 ) : 1.2023s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3139s for 81920 events => throughput is 2.61E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9704s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5382s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4318s for 81920 events => throughput is 1.90E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444664) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.678942e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.872222e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.090787e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.933993e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846943] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3336s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3148s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0186s for 8192 events => throughput is 4.41E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4300s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4050s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0246s for 8192 events => throughput is 3.33E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846943) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846957) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.2596s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0744s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1850s for 81920 events => throughput is 4.43E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.8024s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5570s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2451s for 81920 events => throughput is 3.34E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.526680e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.358555e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.549791e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.302135e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846943] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3237s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3124s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0111s for 8192 events => throughput is 7.41E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4222s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4069s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0149s for 8192 events => throughput is 5.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641911695846943) differ by less than 3E-14 (1.1102230246251565e-16) +OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,9 +319,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -329,36 +329,110 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.1545s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0467s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1076s for 81920 events => throughput is 7.62E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.6905s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5406s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1495s for 81920 events => throughput is 5.48E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473264592444679) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.827533e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.319188e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 7.895903e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.338203e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4218s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4072s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.6848s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5455s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1390s for 81920 events => throughput is 5.89E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.862092e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.876638e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,9 +444,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -380,20 +454,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.5972s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5827s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s + [COUNTERS] PROGRAM TOTAL : 0.4377s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4136s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0237s for 8192 events => throughput is 3.45E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and hip (44.641911695846950) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,9 +479,89 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_d_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.7587s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5445s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2138s for 81920 events => throughput is 3.83E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.605581e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.598085e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.8470s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8431s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.63E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s + +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cuda (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -415,57 +569,59 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.3346s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3107s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0168s for 81920 events => throughput is 4.88E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0072s + [COUNTERS] PROGRAM TOTAL : 1.9864s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9768s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0088s for 81920 events => throughput is 9.29E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0007s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and hip (44.473264592444679) differ by less than 3E-14 (0.0) +OK! xsec from fortran (44.473264592444671) and cuda (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.490585e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.051887e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.422055e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.338765e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.729175e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.900263e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.118093e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.747078e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.733332e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.880130e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.908378e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.996058e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.723052e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.898528e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.108063e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.732046e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index e7d3a0ecd8..ed21485c0d 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' + make USEBUILDDIR=1 BACKEND=cppsse4 make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' - make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-04_14:14:57 +DATE: 2024-10-03_03:46:59 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.5798s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5518s - [COUNTERS] Fortran MEs ( 1 ) : 0.0280s for 8192 events => throughput is 2.92E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8170s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7765s + [COUNTERS] Fortran MEs ( 1 ) : 0.0405s for 8192 events => throughput is 2.02E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3297s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3015s - [COUNTERS] Fortran MEs ( 1 ) : 0.0282s for 8192 events => throughput is 2.90E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4546s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4109s + [COUNTERS] Fortran MEs ( 1 ) : 0.0437s for 8192 events => throughput is 1.88E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.3339s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0465s - [COUNTERS] Fortran MEs ( 1 ) : 0.2874s for 81920 events => throughput is 2.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9363s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5280s + [COUNTERS] Fortran MEs ( 1 ) : 0.4083s for 81920 events => throughput is 2.01E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641905397892330] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641906072918047] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.4267s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3987s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0278s for 8192 events => throughput is 2.95E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4470s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4067s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0401s for 8192 events => throughput is 2.04E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641905397892330) differ by less than 4E-4 (1.4107717127842534e-07) +OK! xsec from fortran (44.641911695846957) and cpp (44.641906072918047) differ by less than 4E-4 (1.2595627507661078e-07) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,38 +159,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473258075185306] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473258789404959] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.3244s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0461s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.2781s for 81920 events => throughput is 2.95E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.9462s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5411s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4048s for 81920 events => throughput is 2.02E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473258075185306) differ by less than 4E-4 (1.465433093761348e-07) +OK! xsec from fortran (44.473264592444671) and cpp (44.473258789404959) differ by less than 4E-4 (1.3048378089131063e-07) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.992620e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.996508e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.033930e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.026268e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641902617887730] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641902189470080] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3220s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3089s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0130s for 8192 events => throughput is 6.30E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4206s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4040s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0164s for 8192 events => throughput is 5.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641902617887730) differ by less than 4E-4 (2.0335059314202653e-07) +OK! xsec from fortran (44.641911695846957) and cpp (44.641902189470080) differ by less than 4E-4 (2.1294735186305758e-07) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473255619824656] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473255074265531] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.1816s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0519s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1296s for 81920 events => throughput is 6.32E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.7742s + [COUNTERS] Fortran Overhead ( 0 ) : 1.6011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1728s for 81920 events => throughput is 4.74E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473255619824656) differ by less than 4E-4 (2.0175312298587045e-07) +OK! xsec from fortran (44.473264592444671) and cpp (44.473255074265531) differ by less than 4E-4 (2.1402024852346102e-07) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.559069e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.652600e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.495969e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.627498e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641902771385062] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3141s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3075s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0065s for 8192 events => throughput is 1.27E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 0.4300s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4204s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0094s for 8192 events => throughput is 8.75E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641902771385062) differ by less than 4E-4 (1.9991218003223565e-07) +OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,120 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473255186065366] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.1050s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0407s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0642s for 81920 events => throughput is 1.28E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0001s + [COUNTERS] PROGRAM TOTAL : 1.6340s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5463s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0874s for 81920 events => throughput is 9.37E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473255186065366) differ by less than 4E-4 (2.1150638251921094e-07) +OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.271021e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.151357e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.195524e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.236288e+05 ) sec^-1 + +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4108s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4024s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0082s for 8192 events => throughput is 1.01E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07) -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.6230s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5395s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0833s for 81920 events => throughput is 9.84E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 9.906699e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.013538e+06 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,30 +444,30 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641905467548966] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641906399820272] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6207s - [COUNTERS] Fortran Overhead ( 0 ) : 0.6070s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0048s for 8192 events => throughput is 1.72E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0089s + [COUNTERS] PROGRAM TOTAL : 0.4217s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4097s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0117s for 8192 events => throughput is 6.98E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and hip (44.641905467548966) differ by less than 4E-4 (1.3951682953372568e-07) +OK! xsec from fortran (44.641911695846957) and cpp (44.641906399820272) differ by less than 4E-4 (1.1863351012664225e-07) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,67 +479,149 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_f_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473257658055729] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473258854390501] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.4238s - [COUNTERS] Fortran Overhead ( 0 ) : 1.4066s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0083s for 81920 events => throughput is 9.81E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0088s + [COUNTERS] PROGRAM TOTAL : 1.6717s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5584s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1131s for 81920 events => throughput is 7.24E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473258854390501) differ by less than 4E-4 (1.2902255375202287e-07) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.876658e+05 ) sec^-1 -OK! xsec from fortran (44.473264592444679) and hip (44.473257658055729) differ by less than 4E-4 (1.5592264279717938e-07) +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.015744e+05 ) sec^-1 -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641910992291372] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.8376s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8340s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0030s for 8192 events => throughput is 2.70E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cuda (44.641910992291372) differ by less than 4E-4 (1.575997887748315e-08) + +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical + +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473262664842089] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.9938s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9852s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0080s for 81920 events => throughput is 1.02E+07 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0006s + +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cuda (44.473262664842089) differ by less than 4E-4 (4.334295222729878e-08) + +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.787408e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.110624e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.796448e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.475370e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.375567e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.948933e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.746471e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.365477e+08 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.552641e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.962850e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 6.832561e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.369650e+08 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 5.125599e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 4.634262e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.244604e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.047453e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index 18c795f9eb..14485e47cc 100644 --- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,41 +1,41 @@ -Working directory (build): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -make USEBUILDDIR=1 BACKEND=hip +make USEBUILDDIR=1 BACKEND=cuda -make USEBUILDDIR=1 BACKEND=cppnone -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cppsse4 +make USEBUILDDIR=1 BACKEND=cppnone make USEBUILDDIR=1 BACKEND=cppavx2 -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make USEBUILDDIR=1 BACKEND=cpp512z -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make[1]: Entering directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' make[1]: Nothing to be done for 'all'. -make[1]: Leaving directory '/pfs/lustrep3/scratch/project_465001114/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' OMP_NUM_THREADS= -DATE: 2024-10-04_14:15:14 +DATE: 2024-10-03_03:47:26 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: -Working directory (run): /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) *** -------------------- @@ -49,18 +49,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 2625 events (found 5368 events) - [COUNTERS] PROGRAM TOTAL : 0.5725s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5438s - [COUNTERS] Fortran MEs ( 1 ) : 0.0287s for 8192 events => throughput is 2.85E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.8264s + [COUNTERS] Fortran Overhead ( 0 ) : 0.7844s + [COUNTERS] Fortran MEs ( 1 ) : 0.0420s for 8192 events => throughput is 1.95E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) *** -------------------- @@ -74,18 +74,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x1_fortran > /tmp/valassia/output_susyggtt_x1_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/avalassi/output_susyggtt_x1_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=0 + [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3510s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3222s - [COUNTERS] Fortran MEs ( 1 ) : 0.0289s for 8192 events => throughput is 2.84E+05 events/s + [COUNTERS] PROGRAM TOTAL : 0.4404s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4003s + [COUNTERS] Fortran MEs ( 1 ) : 0.0401s for 8192 events => throughput is 2.04E+05 events/s *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) *** -------------------- @@ -99,18 +99,18 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./madevent_fortran < /tmp/valassia/input_susyggtt_x10_fortran > /tmp/valassia/output_susyggtt_x10_fortran' - [OPENMPTH] omp_get_max_threads/nproc = 1/128 +Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp/avalassi/output_susyggtt_x10_fortran' + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=0 + [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.3713s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0790s - [COUNTERS] Fortran MEs ( 1 ) : 0.2923s for 81920 events => throughput is 2.80E+05 events/s + [COUNTERS] PROGRAM TOTAL : 1.9467s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5401s + [COUNTERS] Fortran MEs ( 1 ) : 0.4066s for 81920 events => throughput is 2.01E+05 events/s *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -124,24 +124,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912938404211] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3654s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3313s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0339s for 8192 events => throughput is 2.42E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4496s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4055s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0436s for 8192 events => throughput is 1.88E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641912938404211) differ by less than 2E-4 (2.783387209603916e-08) +OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08) *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -159,9 +159,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -169,28 +169,28 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.4488s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1187s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.3299s for 81920 events => throughput is 2.48E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.9926s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5534s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.4388s for 81920 events => throughput is 1.87E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) +OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.326687e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.887986e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 2.596950e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 1.905348e+05 ) sec^-1 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -204,24 +204,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912938404225] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3417s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3211s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0204s for 8192 events => throughput is 4.01E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4296s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4052s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0240s for 8192 events => throughput is 3.41E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641912938404225) differ by less than 2E-4 (2.7833872318083763e-08) +OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08) *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -239,38 +239,38 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265850735238] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.2982s - [COUNTERS] Fortran Overhead ( 0 ) : 1.1031s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1949s for 81920 events => throughput is 4.20E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7806s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5419s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2384s for 81920 events => throughput is 3.44E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473265850735238) differ by less than 2E-4 (2.8293190679207214e-08) +OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08) *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.481485e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.451620e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 4.531906e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.293351e+05 ) sec^-1 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- @@ -284,24 +284,24 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.64 [44.641912966309015] fbridge_mode=1 + [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.3413s - [COUNTERS] Fortran Overhead ( 0 ) : 0.3305s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0106s for 8192 events => throughput is 7.70E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 0.4164s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4011s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0150s for 8192 events => throughput is 5.46E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and cpp (44.641912966309015) differ by less than 2E-4 (2.8458952971988083e-08) +OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** @@ -319,46 +319,120 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE [XSECTION] Configuration = 1 [XSECTION] ChannelId = 1 - [XSECTION] Cross section = 44.47 [44.473265882025295] fbridge_mode=1 + [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.1605s - [COUNTERS] Fortran Overhead ( 0 ) : 1.0547s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.1056s for 81920 events => throughput is 7.75E+05 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0002s + [COUNTERS] PROGRAM TOTAL : 1.7047s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5555s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1489s for 81920 events => throughput is 5.50E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and cpp (44.473265882025295) differ by less than 2E-4 (2.899676077028346e-08) +OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical *** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.023285e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.376926e+05 ) sec^-1 *** EXECUTE CHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 8.043041e+05 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.203989e+05 ) sec^-1 -*** (2-512y) WARNING! SKIP MADEVENT_CPP (512y is not supported on this node) *** +*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4271s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4126s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0142s for 8192 events => throughput is 5.77E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0003s -*** (2-512z) WARNING! SKIP MADEVENT_CPP (512z is not supported on this node) *** +*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** -*** (3-cuda) WARNING! SKIP MADEVENT_CUDA (cuda is not supported on this node) *** +OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) -*** (3-hip) EXECUTE MADEVENT_HIP x1 (create events.lhe) *** +*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.6888s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5521s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.1364s for 81920 events => throughput is 6.01E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) + +*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 5.987404e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 6.016756e+05 ) sec^-1 + +*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -370,9 +444,89 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x1_cudacpp > /tmp/valassia/output_susyggtt_x1_cudacpp' +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1 + [UNWEIGHT] Wrote 1617 events (found 1622 events) + [COUNTERS] PROGRAM TOTAL : 0.4227s + [COUNTERS] Fortran Overhead ( 0 ) : 0.4021s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0202s for 8192 events => throughput is 4.06E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08) + +*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical + +*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +81920 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' +DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 + [NGOODHEL] ngoodhel/ncomb = 16/16 + [XSECTION] VECSIZE_USED = 8192 + [XSECTION] MultiChannel = TRUE + [XSECTION] Configuration = 1 + [XSECTION] ChannelId = 1 + [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1 + [UNWEIGHT] Wrote 1622 events (found 1627 events) + [COUNTERS] PROGRAM TOTAL : 1.7481s + [COUNTERS] Fortran Overhead ( 0 ) : 1.5428s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.2049s for 81920 events => throughput is 4.00E+05 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0004s + +*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec *** + +OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08) + +*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** + +OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical + +*** EXECUTE CHECK(8192) -p 256 32 1 --bridge *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.718921e+05 ) sec^-1 + +*** EXECUTE CHECK(8192) -p 256 32 1 *** +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.666481e+05 ) sec^-1 + +*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) *** +-------------------- +CUDACPP_RUNTIME_FBRIDGEMODE = (not set) +CUDACPP_RUNTIME_VECSIZEUSED = 8192 +-------------------- +8192 1 1 ! Number of events and max and min iterations +0.000001 ! Accuracy (ignored because max iterations = min iterations) +0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present) +1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement) +0 ! Helicity Sum/event 0=exact +1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) +-------------------- +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x1_cudacpp > /tmp/avalassi/output_susyggtt_x1_cudacpp' +DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -380,20 +534,20 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.64 [44.641911674225568] fbridge_mode=1 [UNWEIGHT] Wrote 1617 events (found 1622 events) - [COUNTERS] PROGRAM TOTAL : 0.6047s - [COUNTERS] Fortran Overhead ( 0 ) : 0.5899s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0058s for 8192 events => throughput is 1.41E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0090s + [COUNTERS] PROGRAM TOTAL : 0.8408s + [COUNTERS] Fortran Overhead ( 0 ) : 0.8369s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0031s for 8192 events => throughput is 2.62E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x1 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.641911695846950) and hip (44.641911674225568) differ by less than 2E-4 (4.843292433776014e-10) +OK! xsec from fortran (44.641911695846957) and cuda (44.641911674225568) differ by less than 2E-4 (4.843293543999039e-10) -*** (3-hip) Compare MADEVENT_HIP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.1 and events.lhe.ref.1 are identical +OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical -*** (3-hip) EXECUTE MADEVENT_HIP x10 (create events.lhe) *** +*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) *** -------------------- CUDACPP_RUNTIME_FBRIDGEMODE = (not set) CUDACPP_RUNTIME_VECSIZEUSED = 8192 @@ -405,9 +559,9 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192 0 ! Helicity Sum/event 0=exact 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!) -------------------- -Executing ' ./build.hip_m_inl0_hrd0/madevent_hip < /tmp/valassia/input_susyggtt_x10_cudacpp > /tmp/valassia/output_susyggtt_x10_cudacpp' +Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp' DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } - [OPENMPTH] omp_get_max_threads/nproc = 1/128 + [OPENMPTH] omp_get_max_threads/nproc = 1/4 [NGOODHEL] ngoodhel/ncomb = 16/16 [XSECTION] VECSIZE_USED = 8192 [XSECTION] MultiChannel = TRUE @@ -415,57 +569,59 @@ DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 } [XSECTION] ChannelId = 1 [XSECTION] Cross section = 44.47 [44.473264587763374] fbridge_mode=1 [UNWEIGHT] Wrote 1622 events (found 1627 events) - [COUNTERS] PROGRAM TOTAL : 1.3396s - [COUNTERS] Fortran Overhead ( 0 ) : 1.3143s - [COUNTERS] CudaCpp MEs ( 2 ) : 0.0167s for 81920 events => throughput is 4.91E+06 events/s - [COUNTERS] CudaCpp HEL ( 3 ) : 0.0086s + [COUNTERS] PROGRAM TOTAL : 2.0041s + [COUNTERS] Fortran Overhead ( 0 ) : 1.9943s + [COUNTERS] CudaCpp MEs ( 2 ) : 0.0090s for 81920 events => throughput is 9.12E+06 events/s + [COUNTERS] CudaCpp HEL ( 3 ) : 0.0008s -*** (3-hip) Compare MADEVENT_HIP x10 xsec to MADEVENT_FORTRAN xsec *** +*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec *** -OK! xsec from fortran (44.473264592444679) and hip (44.473264587763374) differ by less than 2E-4 (1.0526113314313079e-10) +OK! xsec from fortran (44.473264592444671) and cuda (44.473264587763374) differ by less than 2E-4 (1.0526091109852587e-10) -*** (3-hip) Compare MADEVENT_HIP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** +*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) *** -OK! events.lhe.hip.10 and events.lhe.ref.10 are identical +OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.486525e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.043134e+06 ) sec^-1 *** EXECUTE GCHECK(8192) -p 256 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.410712e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 3.399822e+06 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.737904e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.879175e+07 ) sec^-1 *** EXECUTE GCHECK(MAX) -p 16384 32 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.127465e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.546320e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.738843e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.879320e+07 ) sec^-1 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.899456e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 7.922385e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 1.713230e+07 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.873490e+07 ) sec^-1 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 *** -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -EvtsPerSec[MECalcOnly] (3a) = ( 3.120015e+06 ) sec^-1 +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +EvtsPerSec[MECalcOnly] (3a) = ( 2.754671e+07 ) sec^-1 + +*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) *** TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt index 43da6e9aa5..c3f0ed1d47 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_10:23:05 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:21:05 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.209600e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.872254e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.989444e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.535787 sec -INFO: No Floating Point Exceptions have been reported - 1,434,722,098 cycles:u # 2.644 GHz (74.58%) - 2,578,399 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.56%) - 6,866,717 stalled-cycles-backend:u # 0.48% backend cycles idle (75.30%) - 2,088,564,042 instructions:u # 1.46 insn per cycle - # 0.00 stalled cycles per insn (74.67%) - 0.599328986 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.114935e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.582761e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.939652e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.823338 sec +INFO: No Floating Point Exceptions have been reported + 2,781,829,840 cycles # 2.927 GHz + 4,278,879,817 instructions # 1.54 insn per cycle + 1.128949739 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.383707e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.589135e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.589135e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.278206 sec -INFO: No Floating Point Exceptions have been reported - 17,739,462,314 cycles:u # 3.354 GHz (75.03%) - 50,106,117 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.04%) - 261,356,239 stalled-cycles-backend:u # 1.47% backend cycles idle (75.04%) - 47,091,390,697 instructions:u # 2.65 insn per cycle - # 0.01 stalled cycles per insn (75.04%) - 5.293316763 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.072198e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.251574e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.251574e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.292206 sec +INFO: No Floating Point Exceptions have been reported + 19,188,263,570 cycles # 3.045 GHz + 46,171,187,745 instructions # 2.41 insn per cycle + 6.302411306 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.029301e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.540119e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.540119e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.825031 sec -INFO: No Floating Point Exceptions have been reported - 12,681,894,597 cycles:u # 3.307 GHz (74.97%) - 50,229,914 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.97%) - 484,037,411 stalled-cycles-backend:u # 3.82% backend cycles idle (74.99%) - 31,763,793,252 instructions:u # 2.50 insn per cycle - # 0.02 stalled cycles per insn (74.99%) - 3.840009470 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.615174e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.112322e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.112322e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.331258 sec +INFO: No Floating Point Exceptions have been reported + 13,153,752,094 cycles # 3.031 GHz + 31,715,681,802 instructions # 2.41 insn per cycle + 4.341524872 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.799934e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.765940e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.765940e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.969501 sec -INFO: No Floating Point Exceptions have been reported - 9,679,661,163 cycles:u # 3.249 GHz (74.96%) - 49,712,980 stalled-cycles-frontend:u # 0.51% frontend cycles idle (75.03%) - 904,119,408 stalled-cycles-backend:u # 9.34% backend cycles idle (75.03%) - 19,500,860,421 instructions:u # 2.01 insn per cycle - # 0.05 stalled cycles per insn (75.03%) - 2.983989983 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.026416e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.839154e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.839154e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.529215 sec +INFO: No Floating Point Exceptions have been reported + 10,251,997,224 cycles # 2.897 GHz + 19,667,313,704 instructions # 1.92 insn per cycle + 3.539347005 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.051463e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.907164e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.907164e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.495119 sec +INFO: No Floating Point Exceptions have been reported + 10,162,863,648 cycles # 2.902 GHz + 19,355,102,855 instructions # 1.90 insn per cycle + 3.505408660 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.813583e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.421948e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.421948e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.895263 sec +INFO: No Floating Point Exceptions have been reported + 8,768,256,609 cycles # 2.246 GHz + 15,838,557,376 instructions # 1.81 insn per cycle + 3.905255721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt index 088a07a09d..a59f4a8bf6 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt @@ -1,54 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_11:13:53 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:59:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.856473e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.614655e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.614655e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.520594 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 18,145,006,763 cycles:u # 3.284 GHz (75.07%) - 219,222,569 stalled-cycles-frontend:u # 1.21% frontend cycles idle (75.06%) - 6,752,190,970 stalled-cycles-backend:u # 37.21% backend cycles idle (75.01%) - 16,698,321,112 instructions:u # 0.92 insn per cycle - # 0.40 stalled cycles per insn (74.89%) - 5.592402423 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.721261e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.941229e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.941229e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.226356 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,271,743,384 cycles # 2.941 GHz + 12,922,647,058 instructions # 1.78 insn per cycle + 2.529249715 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -56,36 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.348917e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.547978e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.547978e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.516587 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 18,320,160,243 cycles:u # 3.308 GHz (74.99%) - 49,931,362 stalled-cycles-frontend:u # 0.27% frontend cycles idle (75.01%) - 393,514,300 stalled-cycles-backend:u # 2.15% backend cycles idle (75.02%) - 47,323,149,472 instructions:u # 2.58 insn per cycle - # 0.01 stalled cycles per insn (75.02%) - 5.542562977 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.036468e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.202117e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.202117e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.678078 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 20,324,172,184 cycles # 3.040 GHz + 46,315,699,520 instructions # 2.28 insn per cycle + 6.685452158 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -93,36 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.953054e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.422114e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.422114e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.095942 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 13,397,410,567 cycles:u # 3.254 GHz (74.94%) - 52,373,136 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.94%) - 529,306,431 stalled-cycles-backend:u # 3.95% backend cycles idle (74.94%) - 32,573,951,196 instructions:u # 2.43 insn per cycle - # 0.02 stalled cycles per insn (74.98%) - 4.122057791 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.546402e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.989841e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.989841e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.681304 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 14,274,276,990 cycles # 3.045 GHz + 32,466,525,739 instructions # 2.27 insn per cycle + 4.688943771 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -130,36 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.673460e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.551032e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.551032e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.223521 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,320,162,030 cycles:u # 3.180 GHz (74.86%) - 40,080,497 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.92%) - 980,428,805 stalled-cycles-backend:u # 9.50% backend cycles idle (75.04%) - 20,354,090,333 instructions:u # 1.97 insn per cycle - # 0.05 stalled cycles per insn (75.10%) - 3.250249712 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.906327e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.606772e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.606772e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.924044 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 11,408,077,664 cycles # 2.903 GHz + 20,951,332,123 instructions # 1.84 insn per cycle + 3.931555912 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -167,16 +183,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.914575e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.618914e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.618914e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.912846 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 11,210,840,615 cycles # 2.861 GHz + 20,624,082,345 instructions # 1.84 insn per cycle + 3.920179017 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.699169e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.222592e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.222592e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.333799 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,037,060,432 cycles # 2.312 GHz + 16,902,306,877 instructions # 1.68 insn per cycle + 4.341202688 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt index fca102346f..7ea35cfe0b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_11:19:20 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:11:54 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.192548e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.883371e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.001383e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 6.443145e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.507639e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.762000e+08 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.724775 sec -INFO: No Floating Point Exceptions have been reported - 15,402,138,829 cycles:u # 3.262 GHz (75.11%) - 153,815,583 stalled-cycles-frontend:u # 1.00% frontend cycles idle (75.03%) - 6,739,435,463 stalled-cycles-backend:u # 43.76% backend cycles idle (74.83%) - 11,546,188,546 instructions:u # 0.75 insn per cycle - # 0.58 stalled cycles per insn (74.83%) - 4.783944753 seconds time elapsed +TOTAL : 1.336303 sec +INFO: No Floating Point Exceptions have been reported + 4,703,225,547 cycles # 3.001 GHz + 7,361,645,114 instructions # 1.57 insn per cycle + 1.625770729 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.360739e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.563330e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.563330e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.065605e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.242135e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.242135e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.363109 sec -INFO: No Floating Point Exceptions have been reported - 17,972,582,951 cycles:u # 3.344 GHz (74.99%) - 49,074,506 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.99%) - 335,813,940 stalled-cycles-backend:u # 1.87% backend cycles idle (74.99%) - 47,138,026,721 instructions:u # 2.62 insn per cycle - # 0.01 stalled cycles per insn (75.00%) - 5.375753941 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.667816 sec +INFO: No Floating Point Exceptions have been reported + 20,174,215,158 cycles # 3.024 GHz + 46,194,433,450 instructions # 2.29 insn per cycle + 6.673472199 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.030468e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.536582e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.536582e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.621083e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.116265e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.116265e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.833998 sec -INFO: No Floating Point Exceptions have been reported - 12,664,916,265 cycles:u # 3.295 GHz (74.98%) - 50,300,295 stalled-cycles-frontend:u # 0.40% frontend cycles idle (75.04%) - 476,519,825 stalled-cycles-backend:u # 3.76% backend cycles idle (75.03%) - 31,722,956,771 instructions:u # 2.50 insn per cycle - # 0.02 stalled cycles per insn (75.03%) - 3.846513223 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.640534 sec +INFO: No Floating Point Exceptions have been reported + 14,164,511,867 cycles # 3.049 GHz + 31,624,566,458 instructions # 2.23 insn per cycle + 4.646256052 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.795971e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.768024e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.768024e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.051763e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.893360e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.893360e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.979520 sec -INFO: No Floating Point Exceptions have been reported - 9,697,692,431 cycles:u # 3.243 GHz (74.87%) - 42,073,971 stalled-cycles-frontend:u # 0.43% frontend cycles idle (74.87%) - 927,318,016 stalled-cycles-backend:u # 9.56% backend cycles idle (75.00%) - 19,480,752,660 instructions:u # 2.01 insn per cycle - # 0.05 stalled cycles per insn (75.12%) - 2.991989434 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) +TOTAL : 3.824965 sec +INFO: No Floating Point Exceptions have been reported + 11,267,126,218 cycles # 2.942 GHz + 19,489,192,245 instructions # 1.73 insn per cycle + 3.830677247 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.087818e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.945247e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.945247e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 3.778924 sec +INFO: No Floating Point Exceptions have been reported + 11,081,632,446 cycles # 2.929 GHz + 18,949,715,150 instructions # 1.71 insn per cycle + 3.784626146 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.831176e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.441760e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.441760e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 +TOTAL : 4.203108 sec +INFO: No Floating Point Exceptions have been reported + 9,786,254,295 cycles # 2.326 GHz + 15,455,384,623 instructions # 1.58 insn per cycle + 4.208912505 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt index 090b5c3f6a..9b9fa89512 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt @@ -1,50 +1,70 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_11:17:32 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:06:27 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.128366e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.857659e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.974805e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.295975 sec -INFO: No Floating Point Exceptions have been reported - 17,592,799,954 cycles:u # 3.305 GHz (75.00%) - 182,786,945 stalled-cycles-frontend:u # 1.04% frontend cycles idle (75.03%) - 13,672,359 stalled-cycles-backend:u # 0.08% backend cycles idle (74.99%) - 15,972,251,030 instructions:u # 0.91 insn per cycle - # 0.01 stalled cycles per insn (75.06%) - 5.356420132 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.089648e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.586443e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.750079e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 1.885226 sec +INFO: No Floating Point Exceptions have been reported + 6,218,727,462 cycles # 2.936 GHz + 11,582,485,978 instructions # 1.86 insn per cycle + 2.174401796 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -52,34 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.385072e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.586518e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.586518e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.273997 sec -INFO: No Floating Point Exceptions have been reported - 17,681,732,061 cycles:u # 3.346 GHz (75.02%) - 50,430,308 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.95%) - 248,748,061 stalled-cycles-backend:u # 1.41% backend cycles idle (74.95%) - 47,188,437,752 instructions:u # 2.67 insn per cycle - # 0.01 stalled cycles per insn (74.96%) - 5.286644699 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 472) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.072872e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.252789e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.252789e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.263357 sec +INFO: No Floating Point Exceptions have been reported + 19,072,777,161 cycles # 3.043 GHz + 46,090,846,095 instructions # 2.42 insn per cycle + 6.269085049 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -87,34 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.008224e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.507631e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.507631e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.869010 sec -INFO: No Floating Point Exceptions have been reported - 12,789,221,933 cycles:u # 3.296 GHz (74.85%) - 51,318,726 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.94%) - 502,548,872 stalled-cycles-backend:u # 3.93% backend cycles idle (75.04%) - 31,779,945,697 instructions:u # 2.48 insn per cycle - # 0.02 stalled cycles per insn (75.05%) - 3.881584459 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1645) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.633315e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.140339e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.140339e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.255223 sec +INFO: No Floating Point Exceptions have been reported + 13,020,735,219 cycles # 3.057 GHz + 31,621,408,671 instructions # 2.43 insn per cycle + 4.260978065 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1662) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -122,34 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.790772e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.759694e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.759694e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.985788 sec -INFO: No Floating Point Exceptions have been reported - 9,705,175,626 cycles:u # 3.239 GHz (74.79%) - 42,542,630 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.79%) - 912,022,666 stalled-cycles-backend:u # 9.40% backend cycles idle (74.96%) - 19,486,481,816 instructions:u # 2.01 insn per cycle - # 0.05 stalled cycles per insn (75.09%) - 2.998243380 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1897) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.046606e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.886962e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.886962e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.469317 sec +INFO: No Floating Point Exceptions have been reported + 10,147,691,110 cycles # 2.921 GHz + 19,588,780,648 instructions # 1.93 insn per cycle + 3.475349152 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1909) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -157,16 +170,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.050953e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.887703e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.887703e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.468623 sec +INFO: No Floating Point Exceptions have been reported + 9,922,328,760 cycles # 2.860 GHz + 19,251,488,263 instructions # 1.94 insn per cycle + 3.474417423 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1647) (512y: 180) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.831827e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.445212e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.445212e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.835346 sec +INFO: No Floating Point Exceptions have been reported + 8,636,609,147 cycles # 2.250 GHz + 15,756,094,199 instructions # 1.82 insn per cycle + 3.841169289 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 871) (512y: 156) (512z: 1258) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt index 14093880fb..c7621e6788 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_10:23:21 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:21:36 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.484097e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.422933e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.563069e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.511661 sec -INFO: No Floating Point Exceptions have been reported - 1,398,188,345 cycles:u # 2.638 GHz (75.61%) - 2,461,273 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.89%) - 5,591,505 stalled-cycles-backend:u # 0.40% backend cycles idle (73.90%) - 2,145,158,950 instructions:u # 1.53 insn per cycle - # 0.00 stalled cycles per insn (73.31%) - 0.575076711 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.819349e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.631215e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.787548e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.671095 sec +INFO: No Floating Point Exceptions have been reported + 2,685,503,883 cycles # 2.965 GHz + 4,130,554,866 instructions # 1.54 insn per cycle + 0.966696272 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165216E-002 -Relative difference = 1.0277079305077159e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.382030e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.584869e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.584869e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.282948 sec -INFO: No Floating Point Exceptions have been reported - 17,777,735,792 cycles:u # 3.359 GHz (74.92%) - 49,448,707 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.94%) - 832,744,629 stalled-cycles-backend:u # 4.68% backend cycles idle (75.01%) - 46,714,050,600 instructions:u # 2.63 insn per cycle - # 0.02 stalled cycles per insn (75.07%) - 5.298501325 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 489) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.052130e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.226989e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.226989e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.412537 sec +INFO: No Floating Point Exceptions have been reported + 19,391,019,124 cycles # 3.020 GHz + 46,154,292,436 instructions # 2.38 insn per cycle + 6.422732999 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 452) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.004010e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.485647e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.485647e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.870864 sec -INFO: No Floating Point Exceptions have been reported - 12,819,717,718 cycles:u # 3.303 GHz (74.92%) - 50,607,851 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.86%) - 359,477,038 stalled-cycles-backend:u # 2.80% backend cycles idle (74.96%) - 31,507,091,856 instructions:u # 2.46 insn per cycle - # 0.01 stalled cycles per insn (75.06%) - 3.885734591 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1605) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.588098e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.081645e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.081645e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.407881 sec +INFO: No Floating Point Exceptions have been reported + 13,105,876,007 cycles # 2.967 GHz + 31,645,255,458 instructions # 2.41 insn per cycle + 4.418072899 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1648) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.740409e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.654964e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.654964e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.022461 sec -INFO: No Floating Point Exceptions have been reported - 9,864,809,022 cycles:u # 3.253 GHz (74.94%) - 50,075,148 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.94%) - 293,036,909 stalled-cycles-backend:u # 2.97% backend cycles idle (74.96%) - 19,443,790,175 instructions:u # 1.97 insn per cycle - # 0.02 stalled cycles per insn (74.96%) - 3.037197737 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1860) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.035425e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.856170e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.856170e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.514751 sec +INFO: No Floating Point Exceptions have been reported + 10,258,432,986 cycles # 2.911 GHz + 19,657,134,826 instructions # 1.92 insn per cycle + 3.524456549 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1894) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868165090E-002 Relative difference = 1.0277089176796747e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.060342e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.905129e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.905129e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.482974 sec +INFO: No Floating Point Exceptions have been reported + 10,093,367,565 cycles # 2.892 GHz + 19,361,669,894 instructions # 1.92 insn per cycle + 3.493075437 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1636) (512y: 178) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165090E-002 +Relative difference = 1.0277089176796747e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.838118e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.475808e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.475808e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.849198 sec +INFO: No Floating Point Exceptions have been reported + 8,644,950,079 cycles # 2.241 GHz + 15,672,088,510 instructions # 1.81 insn per cycle + 3.859415675 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 833) (512y: 153) (512z: 1240) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt index 7fd5ea321f..54eb09f988 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_11:04:26 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:50:31 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.206650e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.859077e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.975637e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.533245 sec -INFO: No Floating Point Exceptions have been reported - 1,420,329,016 cycles:u # 2.584 GHz (76.76%) - 2,497,014 stalled-cycles-frontend:u # 0.18% frontend cycles idle (76.03%) - 12,053,500 stalled-cycles-backend:u # 0.85% backend cycles idle (75.55%) - 2,285,520,867 instructions:u # 1.61 insn per cycle - # 0.01 stalled cycles per insn (74.52%) - 0.596098577 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.126115e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.578363e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.801387e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.690273 sec +INFO: No Floating Point Exceptions have been reported + 2,735,433,860 cycles # 2.950 GHz + 4,273,045,275 instructions # 1.56 insn per cycle + 0.985887175 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165208E-002 -Relative difference = 1.0277079981222336e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.919696e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.340607e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.340607e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 4.018353 sec -INFO: No Floating Point Exceptions have been reported - 13,262,039,050 cycles:u # 3.291 GHz (75.00%) - 32,793,171 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.99%) - 186,423,621 stalled-cycles-backend:u # 1.41% backend cycles idle (74.99%) - 36,897,329,957 instructions:u # 2.78 insn per cycle - # 0.01 stalled cycles per insn (74.91%) - 4.034355011 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 679) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.661112e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.136857e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.136857e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.214528 sec +INFO: No Floating Point Exceptions have been reported + 12,808,005,477 cycles # 3.033 GHz + 32,654,262,253 instructions # 2.55 insn per cycle + 4.225073741 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 281) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.640706e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.573372e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.573372e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.111451 sec -INFO: No Floating Point Exceptions have been reported - 10,154,741,768 cycles:u # 3.252 GHz (74.92%) - 49,697,741 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.92%) - 89,970,819 stalled-cycles-backend:u # 0.89% backend cycles idle (74.90%) - 24,422,576,739 instructions:u # 2.41 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 3.126925503 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2326) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.051696e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.918485e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.918485e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.496269 sec +INFO: No Floating Point Exceptions have been reported + 10,653,047,507 cycles # 3.039 GHz + 24,982,853,721 instructions # 2.35 insn per cycle + 3.507179313 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1246) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.230451e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.583770e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.583770e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.678075 sec -INFO: No Floating Point Exceptions have been reported - 8,614,135,245 cycles:u # 3.203 GHz (74.99%) - 51,623,769 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.02%) - 111,017,559 stalled-cycles-backend:u # 1.29% backend cycles idle (75.02%) - 16,851,748,589 instructions:u # 1.96 insn per cycle - # 0.01 stalled cycles per insn (75.02%) - 2.694227101 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2981) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.258708e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.344293e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.344293e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.213344 sec +INFO: No Floating Point Exceptions have been reported + 9,339,985,820 cycles # 2.898 GHz + 16,922,939,045 instructions # 1.81 insn per cycle + 3.223888003 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1599) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165090E-002 -Relative difference = 1.0277089176796747e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.344116e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.474330e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.474330e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.104706 sec +INFO: No Floating Point Exceptions have been reported + 9,100,480,389 cycles # 2.922 GHz + 16,469,426,004 instructions # 1.81 insn per cycle + 3.115374973 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1355) (512y: 139) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.035984e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.833687e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.833687e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.516318 sec +INFO: No Floating Point Exceptions have been reported + 8,033,525,618 cycles # 2.278 GHz + 14,639,859,340 instructions # 1.82 insn per cycle + 3.527113937 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1003) (512y: 158) (512z: 946) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt index 78c37947fa..28c6ef0de9 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_11:04:40 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:50:57 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.487887e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.405993e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.545751e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.517724 sec -INFO: No Floating Point Exceptions have been reported - 1,408,399,442 cycles:u # 2.627 GHz (74.60%) - 2,508,628 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.69%) - 5,356,088 stalled-cycles-backend:u # 0.38% backend cycles idle (75.20%) - 2,221,238,384 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (75.71%) - 0.576331891 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.262862e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.524016e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.778808e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.681785 sec +INFO: No Floating Point Exceptions have been reported + 2,742,251,071 cycles # 2.977 GHz + 4,303,655,049 instructions # 1.57 insn per cycle + 0.980574806 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039868165216E-002 -Relative difference = 1.0277079305077159e-08 +Avg ME (F77/GPU) = 1.2828039868165201E-002 +Relative difference = 1.0277080522138477e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.697234e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.584139e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.584139e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.070213 sec -INFO: No Floating Point Exceptions have been reported - 9,981,745,626 cycles:u # 3.239 GHz (75.04%) - 49,772,542 stalled-cycles-frontend:u # 0.50% frontend cycles idle (75.08%) - 53,623,611 stalled-cycles-backend:u # 0.54% backend cycles idle (74.96%) - 28,300,840,364 instructions:u # 2.84 insn per cycle - # 0.00 stalled cycles per insn (74.96%) - 3.086278569 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 609) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.161225e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.040754e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.040754e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.335829 sec +INFO: No Floating Point Exceptions have been reported + 10,146,617,229 cycles # 3.033 GHz + 25,589,254,913 instructions # 2.52 insn per cycle + 3.346659723 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 236) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.951826e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.163421e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.163421e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.864877 sec -INFO: No Floating Point Exceptions have been reported - 9,264,715,688 cycles:u # 3.221 GHz (74.97%) - 49,378,464 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.97%) - 48,538,201 stalled-cycles-backend:u # 0.52% backend cycles idle (74.99%) - 21,312,934,455 instructions:u # 2.30 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 2.881181621 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2070) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.389684e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.653493e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.653493e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.061315 sec +INFO: No Floating Point Exceptions have been reported + 9,297,564,398 cycles # 3.028 GHz + 21,628,602,982 instructions # 2.33 insn per cycle + 3.072141619 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1112) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039868164916E-002 Relative difference = 1.0277102699700292e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.453250e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.057430e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.057430e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.551172 sec -INFO: No Floating Point Exceptions have been reported - 8,156,601,641 cycles:u # 3.183 GHz (74.95%) - 48,682,113 stalled-cycles-frontend:u # 0.60% frontend cycles idle (75.03%) - 51,867,280 stalled-cycles-backend:u # 0.64% backend cycles idle (75.03%) - 15,737,675,973 instructions:u # 1.93 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 2.566825767 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2739) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.460349e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.734760e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.734760e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.978841 sec +INFO: No Floating Point Exceptions have been reported + 8,745,360,906 cycles # 2.926 GHz + 16,041,491,471 instructions # 1.83 insn per cycle + 2.989532515 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1497) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039868165086E-002 -Relative difference = 1.0277089447254817e-08 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.476083e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.781435e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.781435e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 2.970273 sec +INFO: No Floating Point Exceptions have been reported + 8,587,107,250 cycles # 2.881 GHz + 15,647,403,648 instructions # 1.82 insn per cycle + 2.981139555 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1264) (512y: 141) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.122558e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.018467e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.018467e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.391235 sec +INFO: No Floating Point Exceptions have been reported + 7,801,685,793 cycles # 2.294 GHz + 14,376,558,537 instructions # 1.84 insn per cycle + 3.401770423 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1031) (512y: 164) (512z: 876) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039868165088E-002 +Relative difference = 1.0277089312025782e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt index e3dd1c6d17..c7851bae9b 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_10:23:37 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:22:06 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.415059e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.154679e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.333976e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 -TOTAL : 0.394459 sec -INFO: No Floating Point Exceptions have been reported - 1,037,476,022 cycles:u # 2.548 GHz (74.87%) - 2,409,202 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.64%) - 7,243,116 stalled-cycles-backend:u # 0.70% backend cycles idle (75.53%) - 2,070,988,901 instructions:u # 2.00 insn per cycle - # 0.00 stalled cycles per insn (75.19%) - 0.451216224 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.333916e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.720978e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.674302e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.576041 sec +INFO: No Floating Point Exceptions have been reported + 2,377,343,527 cycles # 2.962 GHz + 3,703,505,222 instructions # 1.56 insn per cycle + 0.861388802 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.630698e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.914703e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.914703e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.530429 sec -INFO: No Floating Point Exceptions have been reported - 15,220,726,582 cycles:u # 3.354 GHz (74.97%) - 39,030,379 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.97%) - 461,472,431 stalled-cycles-backend:u # 3.03% backend cycles idle (74.97%) - 47,145,457,833 instructions:u # 3.10 insn per cycle - # 0.01 stalled cycles per insn (74.98%) - 4.542602349 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.109379e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.311359e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.311359e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.036148 sec +INFO: No Floating Point Exceptions have been reported + 18,304,223,591 cycles # 3.030 GHz + 45,024,500,068 instructions # 2.46 insn per cycle + 6.042994691 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.196871e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.565237e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.565237e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.643500 sec -INFO: No Floating Point Exceptions have been reported - 8,592,908,878 cycles:u # 3.242 GHz (74.95%) - 38,376,427 stalled-cycles-frontend:u # 0.45% frontend cycles idle (74.95%) - 1,214,006,248 stalled-cycles-backend:u # 14.13% backend cycles idle (74.95%) - 22,479,795,547 instructions:u # 2.62 insn per cycle - # 0.05 stalled cycles per insn (74.97%) - 2.655199075 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.299446e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.533279e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.533279e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.114429 sec +INFO: No Floating Point Exceptions have been reported + 9,418,027,973 cycles # 3.018 GHz + 22,310,907,211 instructions # 2.37 insn per cycle + 3.122195191 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.534852e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.157819e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.157819e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.450189 sec -INFO: No Floating Point Exceptions have been reported - 7,924,483,978 cycles:u # 3.225 GHz (74.96%) - 41,053,771 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.93%) - 1,736,678,490 stalled-cycles-backend:u # 21.92% backend cycles idle (74.93%) - 15,506,768,997 instructions:u # 1.96 insn per cycle - # 0.11 stalled cycles per insn (74.95%) - 2.461796003 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.483873e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.823583e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.823583e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.905968 sec +INFO: No Floating Point Exceptions have been reported + 8,476,323,738 cycles # 2.911 GHz + 15,781,236,641 instructions # 1.86 insn per cycle + 2.913223219 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.502978e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.888551e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.888551e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.881646 sec +INFO: No Floating Point Exceptions have been reported + 8,393,499,476 cycles # 2.906 GHz + 15,616,953,644 instructions # 1.86 insn per cycle + 2.888818844 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.545557e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.922524e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.922524e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.843212 sec +INFO: No Floating Point Exceptions have been reported + 6,718,315,669 cycles # 2.359 GHz + 12,888,229,695 instructions # 1.92 insn per cycle + 2.850457369 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt index 9bf252161c..407af2f83c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt @@ -1,54 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_11:14:14 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:00:32 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.260949e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.091655e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.091655e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 -TOTAL : 5.283278 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 17,560,883,148 cycles:u # 3.310 GHz (75.05%) - 112,550,536 stalled-cycles-frontend:u # 0.64% frontend cycles idle (75.07%) - 6,690,366,957 stalled-cycles-backend:u # 38.10% backend cycles idle (74.97%) - 16,597,038,187 instructions:u # 0.95 insn per cycle - # 0.40 stalled cycles per insn (74.88%) - 5.344260955 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.245423e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.983473e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.983473e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.688744 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,642,999,290 cycles # 2.936 GHz + 10,214,524,122 instructions # 1.81 insn per cycle + 1.977586864 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -56,36 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.611734e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.891508e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.891508e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.641800 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 15,468,183,515 cycles:u # 3.323 GHz (74.91%) - 38,886,191 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.95%) - 469,541,355 stalled-cycles-backend:u # 3.04% backend cycles idle (75.04%) - 47,266,056,863 instructions:u # 3.06 insn per cycle - # 0.01 stalled cycles per insn (75.08%) - 4.659113969 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.094603e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.288157e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.288157e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.221630 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 18,928,122,768 cycles # 3.040 GHz + 45,157,983,866 instructions # 2.39 insn per cycle + 6.228889536 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -93,36 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.076386e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.328396e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.328396e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.798855 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 8,991,931,096 cycles:u # 3.198 GHz (74.97%) - 38,133,187 stalled-cycles-frontend:u # 0.42% frontend cycles idle (74.96%) - 1,257,710,731 stalled-cycles-backend:u # 13.99% backend cycles idle (74.84%) - 23,526,850,713 instructions:u # 2.62 insn per cycle - # 0.05 stalled cycles per insn (74.84%) - 2.816253896 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.221557e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.317309e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.317309e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.330129 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,084,607,792 cycles # 3.023 GHz + 23,610,389,165 instructions # 2.34 insn per cycle + 3.337223492 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -130,36 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.330822e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.832750e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.832750e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.652366 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 8,497,901,482 cycles:u # 3.189 GHz (74.92%) - 41,697,449 stalled-cycles-frontend:u # 0.49% frontend cycles idle (75.06%) - 1,783,825,384 stalled-cycles-backend:u # 20.99% backend cycles idle (75.09%) - 16,496,010,163 instructions:u # 1.94 insn per cycle - # 0.11 stalled cycles per insn (75.09%) - 2.669683386 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.383113e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.593932e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.593932e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.129082 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,216,055,332 cycles # 2.939 GHz + 16,874,105,782 instructions # 1.83 insn per cycle + 3.136137450 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -167,16 +183,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.404313e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.669923e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.669923e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 3.107612 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,139,317,896 cycles # 2.935 GHz + 16,718,242,091 instructions # 1.83 insn per cycle + 3.114416427 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.422868e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.634285e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.634285e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 3.093334 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,456,855,936 cycles # 2.406 GHz + 14,072,286,974 instructions # 1.89 insn per cycle + 3.100340528 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt index fe3846c47c..6e51eea5f0 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_11:19:40 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:12:26 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.386487e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.203073e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.390321e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371906e-02 +- 3.274477e-06 ) GeV^0 -TOTAL : 4.569485 sec -INFO: No Floating Point Exceptions have been reported - 15,043,606,254 cycles:u # 3.295 GHz (74.98%) - 53,934,412 stalled-cycles-frontend:u # 0.36% frontend cycles idle (75.11%) - 6,692,579,126 stalled-cycles-backend:u # 44.49% backend cycles idle (75.07%) - 11,364,204,925 instructions:u # 0.76 insn per cycle - # 0.59 stalled cycles per insn (74.93%) - 4.621115624 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.219425e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.271393e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.274485e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371863e-02 +- 3.269951e-06 ) GeV^0 +TOTAL : 1.184237 sec +INFO: No Floating Point Exceptions have been reported + 4,211,023,602 cycles # 2.994 GHz + 6,711,358,986 instructions # 1.59 insn per cycle + 1.464824370 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.633877e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.919079e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.919079e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.108754e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.311552e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.311552e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.526013 sec -INFO: No Floating Point Exceptions have been reported - 15,210,506,774 cycles:u # 3.356 GHz (74.94%) - 38,928,878 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.94%) - 450,561,554 stalled-cycles-backend:u # 2.96% backend cycles idle (74.95%) - 47,190,129,181 instructions:u # 3.10 insn per cycle - # 0.01 stalled cycles per insn (74.99%) - 4.534902488 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 6.372009 sec +INFO: No Floating Point Exceptions have been reported + 19,261,147,103 cycles # 3.021 GHz + 45,187,144,333 instructions # 2.35 insn per cycle + 6.377610836 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.168663e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.498737e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.498737e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.341796e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.585577e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.585577e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.665278 sec -INFO: No Floating Point Exceptions have been reported - 8,665,266,652 cycles:u # 3.243 GHz (74.87%) - 37,924,343 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.88%) - 1,195,884,985 stalled-cycles-backend:u # 13.80% backend cycles idle (74.96%) - 22,455,976,899 instructions:u # 2.59 insn per cycle - # 0.05 stalled cycles per insn (75.11%) - 2.674409242 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.380098 sec +INFO: No Floating Point Exceptions have been reported + 10,320,148,878 cycles # 3.049 GHz + 22,354,637,694 instructions # 2.17 insn per cycle + 3.385562983 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.531288e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.151978e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.151978e+06 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 2.489756e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.828537e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.828537e+06 ) sec^-1 MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.454561 sec -INFO: No Floating Point Exceptions have been reported - 7,934,402,650 cycles:u # 3.225 GHz (74.98%) - 40,876,284 stalled-cycles-frontend:u # 0.52% frontend cycles idle (74.98%) - 1,742,906,133 stalled-cycles-backend:u # 21.97% backend cycles idle (74.97%) - 15,484,194,069 instructions:u # 1.95 insn per cycle - # 0.11 stalled cycles per insn (74.97%) - 2.463723139 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) +TOTAL : 3.219462 sec +INFO: No Floating Point Exceptions have been reported + 9,424,957,911 cycles # 2.923 GHz + 15,663,887,385 instructions # 1.66 insn per cycle + 3.224887660 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.514091e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.920313e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.920313e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.200138 sec +INFO: No Floating Point Exceptions have been reported + 9,405,049,933 cycles # 2.935 GHz + 15,298,078,322 instructions # 1.63 insn per cycle + 3.205675908 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.575381e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.980148e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.980148e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 +TOTAL : 3.145944 sec +INFO: No Floating Point Exceptions have been reported + 7,690,829,828 cycles # 2.442 GHz + 12,573,137,118 instructions # 1.63 insn per cycle + 3.151480501 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt index fce8e2dea5..e41f96f72e 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt @@ -1,50 +1,70 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_11:17:52 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:06:58 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 12 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 9.143752e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.098317e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.310156e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.371886e-02 +- 3.270260e-06 ) GeV^0 -TOTAL : 5.183411 sec -INFO: No Floating Point Exceptions have been reported - 17,254,799,796 cycles:u # 3.314 GHz (75.03%) - 113,518,720 stalled-cycles-frontend:u # 0.66% frontend cycles idle (75.06%) - 6,686,559,521 stalled-cycles-backend:u # 38.75% backend cycles idle (75.05%) - 16,253,572,458 instructions:u # 0.94 insn per cycle - # 0.41 stalled cycles per insn (75.07%) - 5.239982498 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 9.214771e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.300228e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.215505e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371710e-02 +- 3.270389e-06 ) GeV^0 +TOTAL : 1.471162 sec +INFO: No Floating Point Exceptions have been reported + 5,070,897,985 cycles # 2.995 GHz + 9,257,924,094 instructions # 1.83 insn per cycle + 1.751258093 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -52,34 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.601054e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.884721e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.884721e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.615704 sec -INFO: No Floating Point Exceptions have been reported - 15,491,641,339 cycles:u # 3.352 GHz (74.92%) - 37,931,607 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.93%) - 518,295,353 stalled-cycles-backend:u # 3.35% backend cycles idle (75.01%) - 47,106,508,620 instructions:u # 3.04 insn per cycle - # 0.01 stalled cycles per insn (75.08%) - 4.624339853 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 477) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.116110e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.316779e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.316779e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 5.995790 sec +INFO: No Floating Point Exceptions have been reported + 18,249,461,991 cycles # 3.042 GHz + 45,007,924,974 instructions # 2.47 insn per cycle + 6.001394527 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 411) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -87,34 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.179057e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.517045e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.517045e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.659897 sec -INFO: No Floating Point Exceptions have been reported - 8,655,155,479 cycles:u # 3.246 GHz (74.83%) - 37,812,601 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.96%) - 1,186,940,794 stalled-cycles-backend:u # 13.71% backend cycles idle (75.10%) - 22,508,579,226 instructions:u # 2.60 insn per cycle - # 0.05 stalled cycles per insn (75.10%) - 2.668318748 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1920) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.333543e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.558339e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.558339e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.057214 sec +INFO: No Floating Point Exceptions have been reported + 9,287,290,653 cycles # 3.033 GHz + 22,273,732,814 instructions # 2.40 insn per cycle + 3.062726450 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1954) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -122,34 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.527163e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.155147e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.155147e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.457584 sec -INFO: No Floating Point Exceptions have been reported - 7,933,593,866 cycles:u # 3.220 GHz (75.00%) - 41,086,028 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.00%) - 1,740,967,447 stalled-cycles-backend:u # 21.94% backend cycles idle (75.00%) - 15,472,252,186 instructions:u # 1.95 insn per cycle - # 0.11 stalled cycles per insn (75.00%) - 2.466527154 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2556) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.502845e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.836320e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.836320e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.876199 sec +INFO: No Floating Point Exceptions have been reported + 8,408,107,143 cycles # 2.919 GHz + 15,752,835,316 instructions # 1.87 insn per cycle + 2.881789095 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2565) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -157,16 +170,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.499098e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.884933e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.884933e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.877505 sec +INFO: No Floating Point Exceptions have been reported + 8,358,416,525 cycles # 2.900 GHz + 15,588,323,205 instructions # 1.86 insn per cycle + 2.883031739 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2462) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.587399e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.988207e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.988207e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.795754 sec +INFO: No Floating Point Exceptions have been reported + 6,626,582,298 cycles # 2.366 GHz + 12,863,258,956 instructions # 1.94 insn per cycle + 2.801279409 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1698) (512y: 16) (512z: 1440) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052585973637E-002 +Relative difference = 2.0158743040564767e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt index 181a08d9c8..93cccb812d 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_10:23:51 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:22:32 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.519289e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.667605e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.910890e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 -TOTAL : 0.392354 sec -INFO: No Floating Point Exceptions have been reported - 1,045,582,005 cycles:u # 2.579 GHz (74.69%) - 2,411,477 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.64%) - 8,261,208 stalled-cycles-backend:u # 0.79% backend cycles idle (74.45%) - 2,082,907,116 instructions:u # 1.99 insn per cycle - # 0.00 stalled cycles per insn (73.57%) - 0.449523585 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.343706e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.862423e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.018725e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.575938 sec +INFO: No Floating Point Exceptions have been reported + 2,392,010,928 cycles # 2.956 GHz + 3,674,427,647 instructions # 1.54 insn per cycle + 0.866892917 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.644542e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.934014e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.934014e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 4.500088 sec -INFO: No Floating Point Exceptions have been reported - 15,117,036,320 cycles:u # 3.354 GHz (74.98%) - 38,695,670 stalled-cycles-frontend:u # 0.26% frontend cycles idle (74.98%) - 701,327,415 stalled-cycles-backend:u # 4.64% backend cycles idle (74.98%) - 46,331,934,014 instructions:u # 3.06 insn per cycle - # 0.02 stalled cycles per insn (74.99%) - 4.511695894 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 439) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.105467e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.308351e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.308351e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 6.065807 sec +INFO: No Floating Point Exceptions have been reported + 18,430,609,716 cycles # 3.036 GHz + 45,013,968,880 instructions # 2.44 insn per cycle + 6.072784911 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 397) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039854866802E-002 +Relative difference = 1.1313746984080878e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.184636e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.549935e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.549935e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.648827 sec -INFO: No Floating Point Exceptions have been reported - 8,627,833,311 cycles:u # 3.249 GHz (75.00%) - 38,138,945 stalled-cycles-frontend:u # 0.44% frontend cycles idle (75.00%) - 1,113,458,421 stalled-cycles-backend:u # 12.91% backend cycles idle (75.00%) - 22,343,086,276 instructions:u # 2.59 insn per cycle - # 0.05 stalled cycles per insn (75.00%) - 2.660848486 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1874) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.308005e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.525687e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.525687e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.099771 sec +INFO: No Floating Point Exceptions have been reported + 9,387,612,417 cycles # 3.022 GHz + 22,262,525,785 instructions # 2.37 insn per cycle + 3.106925476 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1935) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.543143e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.175956e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.175956e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.461812 sec -INFO: No Floating Point Exceptions have been reported - 7,940,057,293 cycles:u # 3.215 GHz (74.96%) - 41,174,009 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.06%) - 1,882,846,184 stalled-cycles-backend:u # 23.71% backend cycles idle (75.06%) - 15,379,580,907 instructions:u # 1.94 insn per cycle - # 0.12 stalled cycles per insn (75.06%) - 2.475026898 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2501) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.403111e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.688485e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.688485e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.998210 sec +INFO: No Floating Point Exceptions have been reported + 8,478,264,746 cycles # 2.822 GHz + 15,771,817,686 instructions # 1.86 insn per cycle + 3.005389330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2540) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.519220e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.918776e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.918776e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.869953 sec +INFO: No Floating Point Exceptions have been reported + 8,393,268,013 cycles # 2.918 GHz + 15,616,623,130 instructions # 1.86 insn per cycle + 2.877528511 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2438) (512y: 10) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053255361738E-002 +Relative difference = 2.5376902468575066e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.552752e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.947223e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.947223e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.838532 sec +INFO: No Floating Point Exceptions have been reported + 6,699,223,007 cycles # 2.355 GHz + 12,875,694,500 instructions # 1.92 insn per cycle + 2.846218721 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1669) (512y: 16) (512z: 1427) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052564145764E-002 +Relative difference = 1.9988585667912256e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt index 77ba118279..c2fede3d2c 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_11:04:52 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:51:22 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.414008e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.126835e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.302976e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 -TOTAL : 0.399417 sec -INFO: No Floating Point Exceptions have been reported - 1,001,400,746 cycles:u # 2.433 GHz (75.66%) - 2,389,902 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.96%) - 7,897,841 stalled-cycles-backend:u # 0.79% backend cycles idle (73.56%) - 2,155,425,468 instructions:u # 2.15 insn per cycle - # 0.00 stalled cycles per insn (73.95%) - 0.459641971 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.237934e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.403884e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.415879e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.576926 sec +INFO: No Floating Point Exceptions have been reported + 2,374,711,860 cycles # 2.948 GHz + 3,718,677,413 instructions # 1.57 insn per cycle + 0.862944455 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 109 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.192494e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.739731e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.739731e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 3.544314 sec -INFO: No Floating Point Exceptions have been reported - 11,738,609,068 cycles:u # 3.305 GHz (75.00%) - 37,960,811 stalled-cycles-frontend:u # 0.32% frontend cycles idle (75.00%) - 1,904,176,289 stalled-cycles-backend:u # 16.22% backend cycles idle (75.00%) - 37,556,795,480 instructions:u # 3.20 insn per cycle - # 0.05 stalled cycles per insn (75.00%) - 3.556426727 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 705) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.667468e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.170854e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.170854e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 4.146636 sec +INFO: No Floating Point Exceptions have been reported + 12,261,145,046 cycles # 2.953 GHz + 32,316,842,246 instructions # 2.64 insn per cycle + 4.153494127 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 290) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039543819614E-002 -Relative difference = 3.5561191488957804e-08 +Avg ME (F77/C++) = 1.2828039840314887E-002 +Relative difference = 1.244813035273009e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.858282e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.030493e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.030493e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.308738 sec -INFO: No Floating Point Exceptions have been reported - 7,435,914,224 cycles:u # 3.211 GHz (74.83%) - 39,808,129 stalled-cycles-frontend:u # 0.54% frontend cycles idle (74.82%) - 222,247,801 stalled-cycles-backend:u # 2.99% backend cycles idle (74.97%) - 18,452,473,674 instructions:u # 2.48 insn per cycle - # 0.01 stalled cycles per insn (75.13%) - 2.320549620 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2784) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.725444e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.600281e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.600281e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.681360 sec +INFO: No Floating Point Exceptions have been reported + 8,088,187,177 cycles # 3.009 GHz + 18,710,529,150 instructions # 2.31 insn per cycle + 2.688484326 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1534) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039283704129E-002 +Relative difference = 5.583829420356249e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.889053e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.958140e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.958140e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.293183 sec -INFO: No Floating Point Exceptions have been reported - 7,366,115,678 cycles:u # 3.203 GHz (74.89%) - 43,337,125 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.96%) - 836,904,460 stalled-cycles-backend:u # 11.36% backend cycles idle (74.96%) - 14,165,019,880 instructions:u # 1.92 insn per cycle - # 0.06 stalled cycles per insn (74.99%) - 2.305245880 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4304) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.859277e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.808400e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.808400e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.569037 sec +INFO: No Floating Point Exceptions have been reported + 7,549,873,391 cycles # 2.932 GHz + 14,270,632,476 instructions # 1.89 insn per cycle + 2.576072623 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2234) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053369958070E-002 -Relative difference = 2.627022867500074e-07 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.912318e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.926913e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.926913e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.529094 sec +INFO: No Floating Point Exceptions have been reported + 7,434,475,397 cycles # 2.932 GHz + 13,977,545,253 instructions # 1.88 insn per cycle + 2.536141283 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2087) (512y: 3) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053244447801E-002 +Relative difference = 2.5291823782248813e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.641405e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.120039e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.120039e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.753404 sec +INFO: No Floating Point Exceptions have been reported + 6,573,430,342 cycles # 2.382 GHz + 13,458,829,954 instructions # 2.05 insn per cycle + 2.760331688 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2073) (512y: 1) (512z: 1201) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052562326775E-002 +Relative difference = 1.997440588685788e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt index b9eaa981bd..42dc2f68f3 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_11:05:04 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:51:45 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK -FP precision = FLOAT (NaN/abnormal=1, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.517534e+08 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.633499e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.873519e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.372027e-02 +- 3.270772e-06 ) GeV^0 -TOTAL : 0.394098 sec -INFO: No Floating Point Exceptions have been reported - 984,000,288 cycles:u # 2.417 GHz (75.54%) - 2,289,270 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.86%) - 6,710,527 stalled-cycles-backend:u # 0.68% backend cycles idle (74.60%) - 2,111,029,549 instructions:u # 2.15 insn per cycle - # 0.00 stalled cycles per insn (73.97%) - 0.450745849 seconds time elapsed +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.186843e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.656263e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.696977e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371687e-02 +- 3.270220e-06 ) GeV^0 +TOTAL : 0.581467 sec +INFO: No Floating Point Exceptions have been reported + 2,378,200,312 cycles # 2.946 GHz + 3,636,272,588 instructions # 1.53 insn per cycle + 0.866537822 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 79 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282802e-02 -Avg ME (F77/GPU) = 1.2828036060454906E-002 -Relative difference = 1.251982371809749e-06 +Avg ME (F77/GPU) = 1.2828112125134794E-002 +Relative difference = 7.1815552823662555e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.082552e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.290562e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.290562e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270267e-06 ) GeV^0 -TOTAL : 2.716231 sec -INFO: No Floating Point Exceptions have been reported - 8,898,388,041 cycles:u # 3.267 GHz (74.93%) - 41,829,985 stalled-cycles-frontend:u # 0.47% frontend cycles idle (75.03%) - 29,489,710 stalled-cycles-backend:u # 0.33% backend cycles idle (75.03%) - 28,391,942,107 instructions:u # 3.19 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 2.728179465 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.269342e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.321851e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.321851e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 3.139062 sec +INFO: No Floating Point Exceptions have been reported + 9,447,844,635 cycles # 3.004 GHz + 25,728,895,866 instructions # 2.72 insn per cycle + 3.146180190 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 243) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039569285465E-002 -Relative difference = 3.357602059382168e-08 +Avg ME (F77/C++) = 1.2828039838495897E-002 +Relative difference = 1.2589928273811243e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.295051e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.197798e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.197798e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371887e-02 +- 3.270266e-06 ) GeV^0 -TOTAL : 2.144958 sec -INFO: No Floating Point Exceptions have been reported - 6,871,672,269 cycles:u # 3.193 GHz (74.78%) - 38,823,881 stalled-cycles-frontend:u # 0.56% frontend cycles idle (74.90%) - 30,579,912 stalled-cycles-backend:u # 0.45% backend cycles idle (75.08%) - 16,529,674,900 instructions:u # 2.41 insn per cycle - # 0.00 stalled cycles per insn (75.10%) - 2.157104605 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2423) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.082178e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.667437e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.667437e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371707e-02 +- 3.270376e-06 ) GeV^0 +TOTAL : 2.412915 sec +INFO: No Floating Point Exceptions have been reported + 7,357,724,099 cycles # 3.042 GHz + 16,792,911,111 instructions # 2.28 insn per cycle + 2.419999040 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1311) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 -Avg ME (F77/C++) = 1.2828039385567536E-002 -Relative difference = 4.7897610623017996e-08 +Avg ME (F77/C++) = 1.2828039280066150E-002 +Relative difference = 5.612189004572479e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.100324e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.455573e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.455573e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371885e-02 +- 3.270112e-06 ) GeV^0 -TOTAL : 2.207336 sec -INFO: No Floating Point Exceptions have been reported - 7,073,444,737 cycles:u # 3.194 GHz (74.81%) - 42,370,010 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.88%) - 694,346,485 stalled-cycles-backend:u # 9.82% backend cycles idle (75.06%) - 13,519,186,690 instructions:u # 1.91 insn per cycle - # 0.05 stalled cycles per insn (75.08%) - 2.219078688 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3983) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.009521e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.244937e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.244937e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.458445 sec +INFO: No Floating Point Exceptions have been reported + 7,244,876,322 cycles # 2.940 GHz + 13,685,401,521 instructions # 1.89 insn per cycle + 2.465610624 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2067) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282805e-02 -Avg ME (F77/C++) = 1.2828053349949187E-002 -Relative difference = 2.611425108340261e-07 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.056703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.398349e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.398349e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270341e-06 ) GeV^0 +TOTAL : 2.426097 sec +INFO: No Floating Point Exceptions have been reported + 7,152,685,127 cycles # 2.941 GHz + 13,478,713,055 instructions # 1.88 insn per cycle + 2.433340778 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1935) (512y: 7) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828053220800939E-002 +Relative difference = 2.5107486628541925e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.725686e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.419420e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.419420e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270342e-06 ) GeV^0 +TOTAL : 2.678328 sec +INFO: No Floating Point Exceptions have been reported + 6,471,041,764 cycles # 2.410 GHz + 13,198,051,679 instructions # 2.04 insn per cycle + 2.685585168 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2039) (512y: 2) (512z: 1081) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282805e-02 +Avg ME (F77/C++) = 1.2828052536860923E-002 +Relative difference = 1.977588895209662e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt index 1f715ef8b5..2060fbedbb 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_10:24:04 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:22:58 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.206239e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.874491e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.991778e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.525215 sec -INFO: No Floating Point Exceptions have been reported - 1,408,443,106 cycles:u # 2.603 GHz (75.91%) - 2,359,037 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.93%) - 7,955,864 stalled-cycles-backend:u # 0.56% backend cycles idle (72.82%) - 2,289,766,618 instructions:u # 1.63 insn per cycle - # 0.00 stalled cycles per insn (74.01%) - 0.586272278 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.928121e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.676063e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.875343e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.669424 sec +INFO: No Floating Point Exceptions have been reported + 2,687,042,079 cycles # 2.965 GHz + 4,204,109,883 instructions # 1.56 insn per cycle + 0.965175843 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 166 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590281E-002 -Relative difference = 7.67145406542181e-09 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.390880e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.598202e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.598202e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.256363 sec -INFO: No Floating Point Exceptions have been reported - 17,676,945,035 cycles:u # 3.356 GHz (74.95%) - 52,327,703 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.95%) - 122,603,341 stalled-cycles-backend:u # 0.69% backend cycles idle (74.95%) - 47,500,992,681 instructions:u # 2.69 insn per cycle - # 0.00 stalled cycles per insn (74.95%) - 5.271945686 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 454) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.052853e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.226798e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.226798e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.407166 sec +INFO: No Floating Point Exceptions have been reported + 19,535,555,015 cycles # 3.045 GHz + 46,362,239,692 instructions # 2.37 insn per cycle + 6.417789931 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 466) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.079037e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.611959e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.611959e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.755414 sec -INFO: No Floating Point Exceptions have been reported - 12,399,293,680 cycles:u # 3.293 GHz (74.95%) - 49,795,094 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.93%) - 1,134,444,548 stalled-cycles-backend:u # 9.15% backend cycles idle (74.95%) - 31,491,925,278 instructions:u # 2.54 insn per cycle - # 0.04 stalled cycles per insn (74.95%) - 3.770015251 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1704) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.666136e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.232533e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.232533e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.230802 sec +INFO: No Floating Point Exceptions have been reported + 12,890,679,042 cycles # 3.040 GHz + 31,578,108,652 instructions # 2.45 insn per cycle + 4.240949908 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1731) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.765047e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.709049e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.709049e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.007196 sec -INFO: No Floating Point Exceptions have been reported - 9,786,968,009 cycles:u # 3.243 GHz (74.87%) - 50,806,066 stalled-cycles-frontend:u # 0.52% frontend cycles idle (75.00%) - 270,252,174 stalled-cycles-backend:u # 2.76% backend cycles idle (75.08%) - 19,298,900,833 instructions:u # 1.97 insn per cycle - # 0.01 stalled cycles per insn (75.08%) - 3.021795385 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2054) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.010640e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.821489e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.821489e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.563594 sec +INFO: No Floating Point Exceptions have been reported + 10,372,454,793 cycles # 2.902 GHz + 19,578,852,143 instructions # 1.89 insn per cycle + 3.574922628 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2045) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.069471e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.914096e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.914096e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.466182 sec +INFO: No Floating Point Exceptions have been reported + 10,155,286,917 cycles # 2.921 GHz + 19,386,130,150 instructions # 1.91 insn per cycle + 3.477475193 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1799) (512y: 188) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.858221e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.512069e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.512069e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.816838 sec +INFO: No Floating Point Exceptions have been reported + 8,594,167,517 cycles # 2.246 GHz + 15,203,120,195 instructions # 1.77 insn per cycle + 3.827835521 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 966) (512y: 154) (512z: 1330) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt index 2140351b90..48c59a6c19 100644 --- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -DATE: 2024-10-04_10:24:20 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:23:28 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.543426e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.535835e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.681413e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.371632e-02 +- 3.269165e-06 ) GeV^0 -TOTAL : 0.518055 sec -INFO: No Floating Point Exceptions have been reported - 1,400,628,963 cycles:u # 2.626 GHz (74.64%) - 2,439,477 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.12%) - 10,838,954 stalled-cycles-backend:u # 0.77% backend cycles idle (74.41%) - 2,170,699,040 instructions:u # 1.55 insn per cycle - # 0.00 stalled cycles per insn (75.69%) - 0.579296770 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.001883e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.688202e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.868771e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 0.683910 sec +INFO: No Floating Point Exceptions have been reported + 2,716,417,669 cycles # 2.955 GHz + 4,171,561,022 instructions # 1.54 insn per cycle + 0.979523470 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 154 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.282804e-02 -Avg ME (F77/GPU) = 1.2828039901590284E-002 -Relative difference = 7.67145379496374e-09 +Avg ME (F77/GPU) = 1.2828039901590279E-002 +Relative difference = 7.671454200650844e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.389376e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.597120e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.597120e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 5.261993 sec -INFO: No Floating Point Exceptions have been reported - 17,636,830,308 cycles:u # 3.345 GHz (74.97%) - 50,117,030 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.97%) - 539,327,437 stalled-cycles-backend:u # 3.06% backend cycles idle (74.98%) - 47,039,999,877 instructions:u # 2.67 insn per cycle - # 0.01 stalled cycles per insn (74.97%) - 5.276599745 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 471) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.054705e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.228539e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.228539e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 6.386240 sec +INFO: No Floating Point Exceptions have been reported + 19,440,857,068 cycles # 3.040 GHz + 46,292,428,054 instructions # 2.38 insn per cycle + 6.396172423 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.085003e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.622247e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.622247e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 3.744645 sec -INFO: No Floating Point Exceptions have been reported - 12,398,692,512 cycles:u # 3.302 GHz (74.89%) - 50,378,852 stalled-cycles-frontend:u # 0.41% frontend cycles idle (74.89%) - 483,550,224 stalled-cycles-backend:u # 3.90% backend cycles idle (75.00%) - 31,116,176,638 instructions:u # 2.51 insn per cycle - # 0.02 stalled cycles per insn (75.07%) - 3.759135491 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1654) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.676436e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.220798e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.220798e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 4.182593 sec +INFO: No Floating Point Exceptions have been reported + 12,700,648,520 cycles # 3.030 GHz + 31,544,456,287 instructions # 2.48 insn per cycle + 4.192353583 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1724) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039952548879E-002 Relative difference = 3.6990156841838714e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.789842e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.742527e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.742527e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.371879e-02 +- 3.270020e-06 ) GeV^0 -TOTAL : 2.983691 sec -INFO: No Floating Point Exceptions have been reported - 9,708,487,937 cycles:u # 3.243 GHz (74.91%) - 51,457,671 stalled-cycles-frontend:u # 0.53% frontend cycles idle (74.91%) - 665,719,250 stalled-cycles-backend:u # 6.86% backend cycles idle (74.93%) - 19,217,448,091 instructions:u # 1.98 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 2.998648091 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2008) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.967779e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.746605e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.746605e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.623519 sec +INFO: No Floating Point Exceptions have been reported + 10,490,743,681 cycles # 2.889 GHz + 19,585,261,086 instructions # 1.87 insn per cycle + 3.632834496 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2036) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.282804e-02 Avg ME (F77/C++) = 1.2828039951670679E-002 Relative difference = 3.767475112924841e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.002208e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.806194e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.806194e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.572021 sec +INFO: No Floating Point Exceptions have been reported + 10,103,456,274 cycles # 2.822 GHz + 19,279,378,017 instructions # 1.91 insn per cycle + 3.581949884 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1766) (512y: 191) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.930358e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.638228e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.638228e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.371706e-02 +- 3.270315e-06 ) GeV^0 +TOTAL : 3.683083 sec +INFO: No Floating Point Exceptions have been reported + 8,384,754,211 cycles # 2.271 GHz + 15,047,526,015 instructions # 1.79 insn per cycle + 3.693325560 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 959) (512y: 155) (512z: 1296) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.282804e-02 +Avg ME (F77/C++) = 1.2828039951670679E-002 +Relative difference = 3.767475112924841e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt index 262973dfc9..7468338173 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_10:24:35 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:23:58 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.795706e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.246793e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.263960e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.417372 sec -INFO: No Floating Point Exceptions have been reported - 1,002,312,084 cycles:u # 2.389 GHz (75.58%) - 2,537,157 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.39%) - 5,476,658 stalled-cycles-backend:u # 0.55% backend cycles idle (74.87%) - 1,589,322,484 instructions:u # 1.59 insn per cycle - # 0.00 stalled cycles per insn (74.00%) - 0.479214572 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.498098e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.405782e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.004369e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.530626 sec +INFO: No Floating Point Exceptions have been reported + 2,255,350,138 cycles # 2.943 GHz + 3,167,522,189 instructions # 1.40 insn per cycle + 0.824213544 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.605688e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.669316e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.669316e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.202703 sec -INFO: No Floating Point Exceptions have been reported - 14,442,340,371 cycles:u # 3.427 GHz (74.96%) - 9,564,071 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.95%) - 3,687,995,069 stalled-cycles-backend:u # 25.54% backend cycles idle (74.96%) - 45,567,415,149 instructions:u # 3.16 insn per cycle - # 0.08 stalled cycles per insn (74.95%) - 4.218705673 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.886686e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.936500e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.936500e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.692636 sec +INFO: No Floating Point Exceptions have been reported + 17,368,647,605 cycles # 3.046 GHz + 46,027,534,067 instructions # 2.65 insn per cycle + 5.703786393 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.346809e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.542455e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.542455e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.591690 sec -INFO: No Floating Point Exceptions have been reported - 8,826,253,844 cycles:u # 3.391 GHz (74.74%) - 8,716,002 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.92%) - 2,662,862,677 stalled-cycles-backend:u # 30.17% backend cycles idle (75.07%) - 27,731,598,930 instructions:u # 3.14 insn per cycle - # 0.10 stalled cycles per insn (75.11%) - 2.608471057 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.323966e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.493999e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.493999e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.291463 sec +INFO: No Floating Point Exceptions have been reported + 10,086,066,895 cycles # 3.055 GHz + 27,948,730,669 instructions # 2.77 insn per cycle + 3.302659152 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.342765e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.872250e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.872250e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.606918 sec -INFO: No Floating Point Exceptions have been reported - 5,346,270,058 cycles:u # 3.304 GHz (74.83%) - 9,264,562 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.83%) - 108,531,822 stalled-cycles-backend:u # 2.03% backend cycles idle (74.83%) - 12,360,834,728 instructions:u # 2.31 insn per cycle - # 0.01 stalled cycles per insn (75.08%) - 1.623226321 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.198504e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.619384e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.619384e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.156330 sec +INFO: No Floating Point Exceptions have been reported + 6,234,386,062 cycles # 2.877 GHz + 12,684,453,152 instructions # 2.03 insn per cycle + 2.167952608 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.685017e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.177140e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.177140e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.983940 sec +INFO: No Floating Point Exceptions have been reported + 5,724,695,862 cycles # 2.870 GHz + 12,129,787,940 instructions # 2.12 insn per cycle + 1.995450843 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.687151e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.892823e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.892823e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.999450 sec +INFO: No Floating Point Exceptions have been reported + 5,896,077,322 cycles # 1.959 GHz + 8,395,996,491 instructions # 1.42 insn per cycle + 3.011053687 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt index 518b9cf636..5dd64826c7 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt @@ -1,54 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:14:33 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:01:01 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.823557e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.808700e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.808700e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.236557 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,745,324,778 cycles:u # 2.962 GHz (74.96%) - 37,169,072 stalled-cycles-frontend:u # 0.99% frontend cycles idle (74.92%) - 1,118,909,477 stalled-cycles-backend:u # 29.87% backend cycles idle (75.01%) - 3,914,941,106 instructions:u # 1.05 insn per cycle - # 0.29 stalled cycles per insn (75.05%) - 1.307544711 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.684703e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.020852e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.020852e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.806676 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,134,464,403 cycles # 2.980 GHz + 4,838,192,243 instructions # 1.54 insn per cycle + 1.110475719 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -56,36 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.603139e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.666619e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.666619e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.287763 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 14,548,513,521 cycles:u # 3.375 GHz (74.95%) - 8,379,260 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.95%) - 3,683,639,771 stalled-cycles-backend:u # 25.32% backend cycles idle (74.95%) - 45,666,488,751 instructions:u # 3.14 insn per cycle - # 0.08 stalled cycles per insn (75.01%) - 4.314628971 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.866865e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.914803e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.914803e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.802666 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 17,655,301,343 cycles # 3.040 GHz + 46,001,555,857 instructions # 2.61 insn per cycle + 5.809509158 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -93,36 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.316630e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.510001e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.510001e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.694596 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 8,944,223,279 cycles:u # 3.292 GHz (74.99%) - 8,617,600 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.00%) - 2,703,947,139 stalled-cycles-backend:u # 30.23% backend cycles idle (74.98%) - 27,960,252,014 instructions:u # 3.13 insn per cycle - # 0.10 stalled cycles per insn (74.98%) - 2.721625196 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.287541e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.450328e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.450328e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.381050 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,321,096,155 cycles # 3.046 GHz + 28,032,087,820 instructions # 2.72 insn per cycle + 3.388593541 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -130,36 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.263697e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.782608e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.782608e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.711012 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,503,749,219 cycles:u # 3.175 GHz (74.90%) - 9,739,850 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.08%) - 127,747,937 stalled-cycles-backend:u # 2.32% backend cycles idle (75.12%) - 12,548,320,264 instructions:u # 2.28 insn per cycle - # 0.01 stalled cycles per insn (75.12%) - 1.738305886 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.088715e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.474660e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.474660e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.249251 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,455,426,136 cycles # 2.862 GHz + 12,868,987,997 instructions # 1.99 insn per cycle + 2.256773746 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -167,16 +183,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.518930e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.971845e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.971845e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.089965 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,014,910,839 cycles # 2.869 GHz + 12,312,588,648 instructions # 2.05 insn per cycle + 2.097490367 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.612291e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.802715e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.802715e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.094965 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,077,458,214 cycles # 1.960 GHz + 8,540,885,730 instructions # 1.41 insn per cycle + 3.102450264 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt index 5ebe35f44d..fb067a4517 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:19:57 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:12:54 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.766101e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.257804e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.275208e+07 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.237979e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.266698e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.961441e+07 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.082268 sec -INFO: No Floating Point Exceptions have been reported - 3,316,553,469 cycles:u # 3.004 GHz (74.94%) - 27,319,938 stalled-cycles-frontend:u # 0.82% frontend cycles idle (75.45%) - 1,100,706,908 stalled-cycles-backend:u # 33.19% backend cycles idle (75.01%) - 3,007,525,955 instructions:u # 0.91 insn per cycle - # 0.37 stalled cycles per insn (75.13%) - 1.143449305 seconds time elapsed +TOTAL : 0.625341 sec +INFO: No Floating Point Exceptions have been reported + 2,549,638,677 cycles # 2.971 GHz + 3,713,912,250 instructions # 1.46 insn per cycle + 0.915676485 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.551130e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.612314e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.612314e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.890077e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.940474e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.940474e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.291889 sec -INFO: No Floating Point Exceptions have been reported - 14,756,742,045 cycles:u # 3.429 GHz (74.92%) - 9,912,834 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.90%) - 3,604,260,866 stalled-cycles-backend:u # 24.42% backend cycles idle (74.93%) - 45,550,999,396 instructions:u # 3.09 insn per cycle - # 0.08 stalled cycles per insn (75.04%) - 4.305522360 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.720362 sec +INFO: No Floating Point Exceptions have been reported + 17,428,970,068 cycles # 3.044 GHz + 45,948,811,639 instructions # 2.64 insn per cycle + 5.726910837 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.339443e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.532754e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.532754e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.312122e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.481190e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.481190e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.597570 sec -INFO: No Floating Point Exceptions have been reported - 8,833,336,540 cycles:u # 3.386 GHz (74.72%) - 8,771,257 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.91%) - 2,674,666,106 stalled-cycles-backend:u # 30.28% backend cycles idle (75.07%) - 27,707,773,853 instructions:u # 3.14 insn per cycle - # 0.10 stalled cycles per insn (75.16%) - 2.611099203 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 3.331505 sec +INFO: No Floating Point Exceptions have been reported + 10,154,233,518 cycles # 3.043 GHz + 27,846,201,009 instructions # 2.74 insn per cycle + 3.337417969 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.344690e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.876141e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.876141e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 5.219886e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.630778e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.630778e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.607135 sec -INFO: No Floating Point Exceptions have been reported - 5,353,107,590 cycles:u # 3.307 GHz (74.60%) - 9,513,922 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.59%) - 107,449,573 stalled-cycles-backend:u # 2.01% backend cycles idle (74.86%) - 12,332,779,751 instructions:u # 2.30 insn per cycle - # 0.01 stalled cycles per insn (75.11%) - 1.620726713 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) +TOTAL : 2.174947 sec +INFO: No Floating Point Exceptions have been reported + 6,305,944,181 cycles # 2.892 GHz + 12,563,017,456 instructions # 1.99 insn per cycle + 2.180991635 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.718682e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.205781e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.205781e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.000335 sec +INFO: No Floating Point Exceptions have been reported + 5,780,250,424 cycles # 2.882 GHz + 11,971,200,140 instructions # 2.07 insn per cycle + 2.006264960 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.757157e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.962049e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.962049e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 +TOTAL : 2.956645 sec +INFO: No Floating Point Exceptions have been reported + 5,909,728,884 cycles # 1.996 GHz + 8,241,949,857 instructions # 1.39 insn per cycle + 2.962494747 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt index 40155e52c1..cfdfd81d8b 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt @@ -1,50 +1,70 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:18:11 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:07:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.508342e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.243101e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.260292e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.182059 sec -INFO: No Floating Point Exceptions have been reported - 3,627,604,642 cycles:u # 3.004 GHz (75.55%) - 36,593,388 stalled-cycles-frontend:u # 1.01% frontend cycles idle (75.05%) - 1,113,204,395 stalled-cycles-backend:u # 30.69% backend cycles idle (74.26%) - 3,905,912,620 instructions:u # 1.08 insn per cycle - # 0.29 stalled cycles per insn (74.27%) - 1.239357966 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.943490e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.339371e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.984539e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.708663 sec +INFO: No Floating Point Exceptions have been reported + 2,814,351,890 cycles # 2.973 GHz + 4,386,424,355 instructions # 1.56 insn per cycle + 1.004249462 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -52,34 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.603155e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.666704e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.666704e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.208931 sec -INFO: No Floating Point Exceptions have been reported - 14,449,569,654 cycles:u # 3.424 GHz (74.98%) - 9,335,274 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.98%) - 3,641,385,463 stalled-cycles-backend:u # 25.20% backend cycles idle (74.98%) - 45,573,624,021 instructions:u # 3.15 insn per cycle - # 0.08 stalled cycles per insn (75.00%) - 4.222621372 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 656) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.883485e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.932448e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.932448e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.672690 sec +INFO: No Floating Point Exceptions have been reported + 17,267,443,034 cycles # 3.041 GHz + 45,934,071,651 instructions # 2.66 insn per cycle + 5.678248544 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -87,34 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.337190e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.531828e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.531828e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.598412 sec -INFO: No Floating Point Exceptions have been reported - 8,806,856,659 cycles:u # 3.374 GHz (74.89%) - 9,071,023 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.88%) - 2,680,361,322 stalled-cycles-backend:u # 30.43% backend cycles idle (74.91%) - 27,742,238,202 instructions:u # 3.15 insn per cycle - # 0.10 stalled cycles per insn (75.06%) - 2.611954894 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2456) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.312433e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.476769e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.476769e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.271929 sec +INFO: No Floating Point Exceptions have been reported + 9,963,025,400 cycles # 3.040 GHz + 27,846,624,194 instructions # 2.79 insn per cycle + 3.277897304 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -122,34 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.332743e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.867606e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.867606e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.610457 sec -INFO: No Floating Point Exceptions have been reported - 5,332,808,909 cycles:u # 3.288 GHz (74.85%) - 9,552,700 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.89%) - 125,813,916 stalled-cycles-backend:u # 2.36% backend cycles idle (74.89%) - 12,389,288,629 instructions:u # 2.32 insn per cycle - # 0.01 stalled cycles per insn (74.86%) - 1.624079398 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2488) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.239087e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.651240e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.651240e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.106521 sec +INFO: No Floating Point Exceptions have been reported + 6,082,880,254 cycles # 2.881 GHz + 12,580,112,604 instructions # 2.07 insn per cycle + 2.112469814 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2619) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -157,16 +170,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.713560e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.205418e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.205418e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.942615 sec +INFO: No Floating Point Exceptions have been reported + 5,598,784,098 cycles # 2.875 GHz + 12,021,854,440 instructions # 2.15 insn per cycle + 1.948464491 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.721108e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.921919e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.921919e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.924395 sec +INFO: No Floating Point Exceptions have been reported + 5,709,016,650 cycles # 1.949 GHz + 8,292,946,160 instructions # 1.45 insn per cycle + 2.930717532 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1441) (512y: 122) (512z: 1802) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt index 1139a514e8..e452755d81 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_10:24:48 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:24:23 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.868420e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.360333e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.379306e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.396279 sec -INFO: No Floating Point Exceptions have been reported - 1,007,854,239 cycles:u # 2.437 GHz (75.90%) - 2,351,504 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.22%) - 11,822,469 stalled-cycles-backend:u # 1.17% backend cycles idle (74.03%) - 1,547,822,021 instructions:u # 1.54 insn per cycle - # 0.01 stalled cycles per insn (74.48%) - 0.451945393 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.448581e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.354023e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002210e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.528267 sec +INFO: No Floating Point Exceptions have been reported + 2,275,766,454 cycles # 2.946 GHz + 3,236,087,959 instructions # 1.42 insn per cycle + 0.829364074 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.662256e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.729946e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.729946e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.117486 sec -INFO: No Floating Point Exceptions have been reported - 14,122,801,366 cycles:u # 3.420 GHz (75.01%) - 8,761,903 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) - 286,825,352 stalled-cycles-backend:u # 2.03% backend cycles idle (75.01%) - 44,420,019,295 instructions:u # 3.15 insn per cycle - # 0.01 stalled cycles per insn (75.01%) - 4.133446933 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.936081e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.988461e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.988461e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.552727 sec +INFO: No Floating Point Exceptions have been reported + 16,901,199,171 cycles # 3.038 GHz + 45,022,482,452 instructions # 2.66 insn per cycle + 5.563984445 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.599556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.816704e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.816704e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.458740 sec -INFO: No Floating Point Exceptions have been reported - 8,332,546,922 cycles:u # 3.374 GHz (74.95%) - 9,140,076 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.06%) - 623,618,114 stalled-cycles-backend:u # 7.48% backend cycles idle (75.06%) - 26,731,412,858 instructions:u # 3.21 insn per cycle - # 0.02 stalled cycles per insn (75.06%) - 2.474800682 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2266) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.485422e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.673978e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.673978e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.149592 sec +INFO: No Floating Point Exceptions have been reported + 9,645,674,288 cycles # 3.052 GHz + 26,795,751,605 instructions # 2.78 insn per cycle + 3.161004757 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.604213e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.030498e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.030498e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.766196 sec -INFO: No Floating Point Exceptions have been reported - 5,918,186,168 cycles:u # 3.330 GHz (74.65%) - 9,909,098 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.61%) - 1,417,938,134 stalled-cycles-backend:u # 23.96% backend cycles idle (74.91%) - 14,155,302,337 instructions:u # 2.39 insn per cycle - # 0.10 stalled cycles per insn (75.14%) - 1.781864242 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2690) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.736441e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.083709e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.083709e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.353548 sec +INFO: No Floating Point Exceptions have been reported + 6,761,037,249 cycles # 2.860 GHz + 14,228,059,801 instructions # 2.10 insn per cycle + 2.365157520 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2711) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.968829e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.344780e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.344780e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.247383 sec +INFO: No Floating Point Exceptions have been reported + 6,510,703,452 cycles # 2.883 GHz + 13,816,231,944 instructions # 2.12 insn per cycle + 2.258945119 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2360) (512y: 298) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.569827e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.756116e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.756116e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.073181 sec +INFO: No Floating Point Exceptions have been reported + 6,036,497,255 cycles # 1.958 GHz + 10,155,247,558 instructions # 1.68 insn per cycle + 3.084089287 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1273) (512y: 208) (512z: 1988) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt index d076826ea5..3f301e0024 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:05:15 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:52:06 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.783361e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.238053e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.255152e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.409960 sec -INFO: No Floating Point Exceptions have been reported - 983,336,857 cycles:u # 2.303 GHz (76.14%) - 2,469,457 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.18%) - 4,976,926 stalled-cycles-backend:u # 0.51% backend cycles idle (74.79%) - 1,616,266,414 instructions:u # 1.64 insn per cycle - # 0.00 stalled cycles per insn (74.74%) - 0.471255682 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.340998e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.340259e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.003199e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.527026 sec +INFO: No Floating Point Exceptions have been reported + 2,260,619,407 cycles # 2.959 GHz + 3,198,102,043 instructions # 1.41 insn per cycle + 0.820578908 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.013156e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.100079e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.100079e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.658896 sec -INFO: No Floating Point Exceptions have been reported - 12,513,156,258 cycles:u # 3.408 GHz (74.95%) - 9,245,117 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.96%) - 4,143,339,835 stalled-cycles-backend:u # 33.11% backend cycles idle (74.95%) - 35,233,343,785 instructions:u # 2.82 insn per cycle - # 0.12 stalled cycles per insn (74.97%) - 3.675945427 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 885) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.506708e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.593742e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.593742e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.317728 sec +INFO: No Floating Point Exceptions have been reported + 13,126,642,398 cycles # 3.033 GHz + 34,433,015,624 instructions # 2.62 insn per cycle + 4.328677433 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.636349e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.855133e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.855133e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.442307 sec -INFO: No Floating Point Exceptions have been reported - 8,249,817,051 cycles:u # 3.361 GHz (74.93%) - 9,147,408 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.93%) - 1,535,876,861 stalled-cycles-backend:u # 18.62% backend cycles idle (74.91%) - 21,739,807,224 instructions:u # 2.64 insn per cycle - # 0.07 stalled cycles per insn (74.98%) - 2.458718626 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2458) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.048635e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.191144e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.191144e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.577251 sec +INFO: No Floating Point Exceptions have been reported + 10,804,930,606 cycles # 3.011 GHz + 24,342,813,964 instructions # 2.25 insn per cycle + 3.588852357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.777304e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.226878e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.226878e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.727248 sec -INFO: No Floating Point Exceptions have been reported - 5,769,321,383 cycles:u # 3.317 GHz (74.76%) - 9,128,614 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.96%) - 1,712,066,843 stalled-cycles-backend:u # 29.68% backend cycles idle (75.17%) - 11,985,793,290 instructions:u # 2.08 insn per cycle - # 0.14 stalled cycles per insn (74.96%) - 1.744117750 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3012) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.768382e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.111158e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.111158e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.336794 sec +INFO: No Floating Point Exceptions have been reported + 6,749,191,802 cycles # 2.875 GHz + 12,499,645,150 instructions # 1.85 insn per cycle + 2.348240674 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3115) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.125412e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.517975e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.517975e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.179421 sec +INFO: No Floating Point Exceptions have been reported + 6,250,432,884 cycles # 2.855 GHz + 11,637,371,150 instructions # 1.86 insn per cycle + 2.190039392 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2644) (512y: 239) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.990556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.222673e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.222673e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.765356 sec +INFO: No Floating Point Exceptions have been reported + 5,500,150,684 cycles # 1.982 GHz + 9,392,876,056 instructions # 1.71 insn per cycle + 2.776424500 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2099) (512y: 282) (512z: 1958) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt index fa4a6a7e86..be2a10e541 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:05:26 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:52:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.851942e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.349824e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.368613e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.400885 sec -INFO: No Floating Point Exceptions have been reported - 1,014,543,247 cycles:u # 2.418 GHz (75.45%) - 2,334,024 stalled-cycles-frontend:u # 0.23% frontend cycles idle (76.92%) - 5,645,449 stalled-cycles-backend:u # 0.56% backend cycles idle (76.86%) - 1,545,347,292 instructions:u # 1.52 insn per cycle - # 0.00 stalled cycles per insn (73.89%) - 0.463602690 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.338457e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.391663e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.003521e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.525273 sec +INFO: No Floating Point Exceptions have been reported + 2,295,553,727 cycles # 2.964 GHz + 3,280,425,227 instructions # 1.43 insn per cycle + 0.830798805 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063388516817 -Relative difference = 3.258803416564443e-07 +Avg ME (F77/GPU) = 2.0288063388516822 +Relative difference = 3.2588034143755247e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.568775e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.690176e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.690176e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 3.120636 sec -INFO: No Floating Point Exceptions have been reported - 10,616,092,847 cycles:u # 3.388 GHz (74.98%) - 9,061,560 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.98%) - 143,862,654 stalled-cycles-backend:u # 1.36% backend cycles idle (75.00%) - 34,765,673,828 instructions:u # 3.27 insn per cycle - # 0.00 stalled cycles per insn (74.99%) - 3.137819570 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 408) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.661937e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.759812e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.759812e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 4.074785 sec +INFO: No Floating Point Exceptions have been reported + 12,438,640,427 cycles # 3.045 GHz + 35,010,031,379 instructions # 2.81 insn per cycle + 4.085812214 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 430) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515649 Relative difference = 3.258803992249869e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.034823e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.297045e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.297045e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.262756 sec -INFO: No Floating Point Exceptions have been reported - 7,641,580,700 cycles:u # 3.359 GHz (74.96%) - 9,121,529 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.04%) - 1,914,691,093 stalled-cycles-backend:u # 25.06% backend cycles idle (75.03%) - 21,062,439,124 instructions:u # 2.76 insn per cycle - # 0.09 stalled cycles per insn (75.03%) - 2.279387532 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2073) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.097398e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.243177e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.243177e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.521928 sec +INFO: No Floating Point Exceptions have been reported + 10,753,008,888 cycles # 3.045 GHz + 23,438,472,557 instructions # 2.18 insn per cycle + 3.532739913 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2378) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388515654 Relative difference = 3.2588039900609506e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.381598e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.919001e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.919001e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.601135 sec -INFO: No Floating Point Exceptions have been reported - 5,329,144,968 cycles:u # 3.303 GHz (74.80%) - 9,061,646 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.72%) - 1,024,318,548 stalled-cycles-backend:u # 19.22% backend cycles idle (74.86%) - 11,328,230,141 instructions:u # 2.13 insn per cycle - # 0.09 stalled cycles per insn (75.11%) - 1.617993241 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2332) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.175589e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.585353e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.585353e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.163821 sec +INFO: No Floating Point Exceptions have been reported + 6,187,478,021 cycles # 2.846 GHz + 11,963,155,641 instructions # 1.93 insn per cycle + 2.174767157 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2468) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063388516204 Relative difference = 3.2588037186351226e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.198229e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.610952e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.610952e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.154188 sec +INFO: No Floating Point Exceptions have been reported + 6,208,478,460 cycles # 2.868 GHz + 11,196,014,039 instructions # 1.80 insn per cycle + 2.165281437 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2098) (512y: 174) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.145182e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.398127e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.398127e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.669310 sec +INFO: No Floating Point Exceptions have been reported + 5,332,222,689 cycles # 1.990 GHz + 9,116,285,421 instructions # 1.71 insn per cycle + 2.680750400 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1632) (512y: 208) (512z: 1567) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288063388516204 +Relative difference = 3.2588037186351226e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt index ee04ec4f60..62e8332824 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_10:25:00 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:24:48 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.848450e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.165587e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.189401e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 -TOTAL : 0.336046 sec -INFO: No Floating Point Exceptions have been reported - 791,799,591 cycles:u # 2.270 GHz (74.93%) - 2,269,676 stalled-cycles-frontend:u # 0.29% frontend cycles idle (75.87%) - 6,629,454 stalled-cycles-backend:u # 0.84% backend cycles idle (75.04%) - 1,529,378,535 instructions:u # 1.93 insn per cycle - # 0.00 stalled cycles per insn (73.60%) - 0.391082759 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.165719e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.725538e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.839606e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.490916 sec +INFO: No Floating Point Exceptions have been reported + 2,110,795,508 cycles # 2.938 GHz + 3,030,625,876 instructions # 1.44 insn per cycle + 0.775391712 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.988657e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.072972e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.072972e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.652105 sec -INFO: No Floating Point Exceptions have been reported - 12,612,098,802 cycles:u # 3.446 GHz (74.90%) - 7,268,244 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.89%) - 15,208,438 stalled-cycles-backend:u # 0.12% backend cycles idle (74.98%) - 45,478,259,156 instructions:u # 3.61 insn per cycle - # 0.00 stalled cycles per insn (75.08%) - 3.664109055 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.990027e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.047358e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.047358e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.362790 sec +INFO: No Floating Point Exceptions have been reported + 16,310,909,453 cycles # 3.038 GHz + 45,362,091,727 instructions # 2.78 insn per cycle + 5.370503759 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198337657377 -Relative difference = 8.193642726087208e-08 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.304242e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.700465e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.700465e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.808110 sec -INFO: No Floating Point Exceptions have been reported - 6,146,797,671 cycles:u # 3.386 GHz (74.93%) - 6,779,824 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.89%) - 2,584,489,706 stalled-cycles-backend:u # 42.05% backend cycles idle (74.89%) - 17,099,643,260 instructions:u # 2.78 insn per cycle - # 0.15 stalled cycles per insn (74.92%) - 1.819933619 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.603236e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.957062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.957062e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.377677 sec +INFO: No Floating Point Exceptions have been reported + 7,152,928,948 cycles # 2.999 GHz + 17,830,970,577 instructions # 2.49 insn per cycle + 2.385771116 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198775378987 -Relative difference = 6.036124513188701e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.200088e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.344397e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.344397e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.018174 sec -INFO: No Floating Point Exceptions have been reported - 3,368,462,675 cycles:u # 3.284 GHz (75.11%) - 6,657,313 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.05%) - 1,076,136,635 stalled-cycles-backend:u # 31.95% backend cycles idle (75.04%) - 8,075,374,342 instructions:u # 2.40 insn per cycle - # 0.13 stalled cycles per insn (75.04%) - 1.029920053 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.574095e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.769268e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.769268e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.318456 sec +INFO: No Floating Point Exceptions have been reported + 3,796,804,907 cycles # 2.864 GHz + 8,300,184,284 instructions # 2.19 insn per cycle + 1.326383790 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186282850802 -Relative difference = 1.8321738890139266e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.092654e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.045479e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.045479e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.251317 sec +INFO: No Floating Point Exceptions have been reported + 3,616,269,256 cycles # 2.873 GHz + 7,955,766,878 instructions # 2.20 insn per cycle + 1.259613074 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.839534e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.547643e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.547643e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.631498 sec +INFO: No Floating Point Exceptions have been reported + 3,329,875,936 cycles # 2.032 GHz + 6,139,934,168 instructions # 1.84 insn per cycle + 1.639821352 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt index 4fb6afacf1..630c641b74 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt @@ -1,54 +1,77 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:14:46 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:01:26 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.902290e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.846454e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.846454e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 -TOTAL : 1.154218 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,556,835,307 cycles:u # 3.029 GHz (75.15%) - 20,990,140 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.78%) - 1,118,879,516 stalled-cycles-backend:u # 31.46% backend cycles idle (74.86%) - 3,787,419,515 instructions:u # 1.06 insn per cycle - # 0.30 stalled cycles per insn (74.59%) - 1.212173867 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.033781e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.271776e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.271776e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.678665 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,680,382,600 cycles # 2.941 GHz + 4,125,886,335 instructions # 1.54 insn per cycle + 0.969131900 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -56,36 +79,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.988122e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.072513e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.072513e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.693823 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 12,638,685,291 cycles:u # 3.409 GHz (74.97%) - 7,497,113 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.99%) - 45,528,421 stalled-cycles-backend:u # 0.36% backend cycles idle (74.99%) - 45,589,213,942 instructions:u # 3.61 insn per cycle - # 0.00 stalled cycles per insn (74.97%) - 3.711404559 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.992729e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.049211e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.049211e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.392675 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 16,447,641,920 cycles # 3.047 GHz + 45,376,165,291 instructions # 2.76 insn per cycle + 5.399694143 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -93,36 +115,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198337657377 -Relative difference = 8.193642726087208e-08 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.065974e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.433122e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.433122e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.917484 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 6,448,677,292 cycles:u # 3.340 GHz (74.76%) - 6,354,577 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.80%) - 2,804,322,349 stalled-cycles-backend:u # 43.49% backend cycles idle (75.00%) - 17,249,385,401 instructions:u # 2.67 insn per cycle - # 0.16 stalled cycles per insn (75.14%) - 1.934734505 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.622643e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.967470e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.967470e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.403008 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 7,299,949,064 cycles # 3.030 GHz + 18,072,622,777 instructions # 2.48 insn per cycle + 2.410009326 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -130,36 +149,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198775378987 -Relative difference = 6.036124513188701e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=524288) -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.188263e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.329763e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.329763e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.073007 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,453,359,880 cycles:u # 3.179 GHz (75.02%) - 7,203,128 stalled-cycles-frontend:u # 0.21% frontend cycles idle (74.97%) - 1,092,295,238 stalled-cycles-backend:u # 31.63% backend cycles idle (74.96%) - 8,275,782,953 instructions:u # 2.40 insn per cycle - # 0.13 stalled cycles per insn (75.02%) - 1.091265410 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.349642e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.466667e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.466667e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.394511 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,022,324,849 cycles # 2.873 GHz + 8,505,914,761 instructions # 2.11 insn per cycle + 1.400755806 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -167,16 +183,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186282850802 -Relative difference = 1.8321738890139266e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.999206e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.031817e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.031817e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.296911 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,769,931,058 cycles # 2.893 GHz + 8,150,658,922 instructions # 2.16 insn per cycle + 1.303972646 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=524288) +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.810871e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.499560e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.499560e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.673742 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,483,753,004 cycles # 2.073 GHz + 6,352,116,456 instructions # 1.82 insn per cycle + 1.680900164 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt index 762f16450e..6618ce9254 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:20:10 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:13:19 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.588714e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.159655e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.183290e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.080340e+00 +- 3.470037e-03 ) GeV^0 -TOTAL : 1.012466 sec -INFO: No Floating Point Exceptions have been reported - 3,144,227,554 cycles:u # 3.052 GHz (74.39%) - 10,791,235 stalled-cycles-frontend:u # 0.34% frontend cycles idle (74.44%) - 1,121,436,459 stalled-cycles-backend:u # 35.67% backend cycles idle (74.65%) - 2,941,132,864 instructions:u # 0.94 insn per cycle - # 0.38 stalled cycles per insn (74.92%) - 1.068167444 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.987374e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.707237e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.828345e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.079446e+00 +- 3.403306e-03 ) GeV^0 +TOTAL : 0.574914 sec +INFO: No Floating Point Exceptions have been reported + 2,354,975,975 cycles # 2.955 GHz + 3,428,501,052 instructions # 1.46 insn per cycle + 0.856281449 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.976991e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.060583e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.060583e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.994861e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.050592e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.050592e+05 ) sec^-1 MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.667094 sec -INFO: No Floating Point Exceptions have been reported - 12,641,839,385 cycles:u # 3.441 GHz (74.96%) - 7,496,531 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.96%) - 33,050,827 stalled-cycles-backend:u # 0.26% backend cycles idle (74.96%) - 45,564,942,632 instructions:u # 3.60 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 3.676289637 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 5.393986 sec +INFO: No Floating Point Exceptions have been reported + 16,418,504,516 cycles # 3.041 GHz + 45,362,649,560 instructions # 2.76 insn per cycle + 5.399598972 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198337657377 -Relative difference = 8.193642726087208e-08 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.292930e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.687941e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.687941e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.811019 sec -INFO: No Floating Point Exceptions have been reported - 6,152,822,392 cycles:u # 3.384 GHz (74.93%) - 6,891,410 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.92%) - 2,580,267,707 stalled-cycles-backend:u # 41.94% backend cycles idle (74.92%) - 17,083,592,107 instructions:u # 2.78 insn per cycle - # 0.15 stalled cycles per insn (74.94%) - 1.820069994 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.530039e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.859076e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.859076e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079572e+00 +- 3.404712e-03 ) GeV^0 +TOTAL : 2.456930 sec +INFO: No Floating Point Exceptions have been reported + 7,301,275,560 cycles # 2.966 GHz + 17,806,613,996 instructions # 2.44 insn per cycle + 2.462297497 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198775378987 -Relative difference = 6.036124513188701e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.200840e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.345000e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.345000e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.018437 sec -INFO: No Floating Point Exceptions have been reported - 3,355,093,185 cycles:u # 3.272 GHz (75.04%) - 6,885,352 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.04%) - 1,079,646,151 stalled-cycles-backend:u # 32.18% backend cycles idle (75.04%) - 8,103,194,689 instructions:u # 2.42 insn per cycle - # 0.13 stalled cycles per insn (75.04%) - 1.027456598 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.656659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.868466e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.868466e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.350339 sec +INFO: No Floating Point Exceptions have been reported + 3,915,528,494 cycles # 2.889 GHz + 8,245,555,563 instructions # 2.11 insn per cycle + 1.356032687 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186282850802 -Relative difference = 1.8321738890139266e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.182418e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.053986e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.053986e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404207e-03 ) GeV^0 +TOTAL : 1.281920 sec +INFO: No Floating Point Exceptions have been reported + 3,731,783,402 cycles # 2.900 GHz + 7,862,528,502 instructions # 2.11 insn per cycle + 1.287315829 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.860238e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.561872e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.561872e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.079550e+00 +- 3.404208e-03 ) GeV^0 +TOTAL : 1.668691 sec +INFO: No Floating Point Exceptions have been reported + 3,447,157,076 cycles # 2.060 GHz + 6,046,313,937 instructions # 1.75 insn per cycle + 1.674405054 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt index d38f0dd075..d009382057 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt @@ -1,50 +1,70 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:18:24 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:07:49 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.694581e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.155961e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.179501e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.079682e+00 +- 3.408341e-03 ) GeV^0 -TOTAL : 1.124710 sec -INFO: No Floating Point Exceptions have been reported - 3,529,678,849 cycles:u # 3.075 GHz (74.28%) - 20,680,935 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.37%) - 1,112,277,947 stalled-cycles-backend:u # 31.51% backend cycles idle (74.44%) - 3,734,266,536 instructions:u # 1.06 insn per cycle - # 0.30 stalled cycles per insn (75.36%) - 1.179583202 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.732740e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.726714e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.848355e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086805e+00 +- 3.414078e-03 ) GeV^0 +TOTAL : 0.621200 sec +INFO: No Floating Point Exceptions have been reported + 2,502,023,855 cycles # 2.967 GHz + 3,885,363,287 instructions # 1.55 insn per cycle + 0.901561261 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -52,34 +72,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.976978e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.060428e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.060428e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.668167 sec -INFO: No Floating Point Exceptions have been reported - 12,646,077,468 cycles:u # 3.441 GHz (74.97%) - 7,141,243 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.97%) - 15,653,788 stalled-cycles-backend:u # 0.12% backend cycles idle (74.97%) - 45,478,593,220 instructions:u # 3.60 insn per cycle - # 0.00 stalled cycles per insn (74.97%) - 3.677293409 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 638) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.981553e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.037751e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.037751e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.376232 sec +INFO: No Floating Point Exceptions have been reported + 16,248,042,022 cycles # 3.020 GHz + 45,331,416,361 instructions # 2.79 insn per cycle + 5.381836614 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -87,34 +106,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198337657377 -Relative difference = 8.193642726087208e-08 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.083713e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.528278e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.528278e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.882152 sec -INFO: No Floating Point Exceptions have been reported - 6,365,355,019 cycles:u # 3.369 GHz (75.02%) - 6,125,831 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.02%) - 2,586,519,085 stalled-cycles-backend:u # 40.63% backend cycles idle (75.02%) - 17,067,881,993 instructions:u # 2.68 insn per cycle - # 0.15 stalled cycles per insn (75.02%) - 1.891163022 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2899) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.659533e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.006067e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.006067e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.336141 sec +INFO: No Floating Point Exceptions have been reported + 7,090,666,725 cycles # 3.029 GHz + 17,790,450,090 instructions # 2.51 insn per cycle + 2.341746280 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3144) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -122,34 +138,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198775378987 -Relative difference = 6.036124513188701e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.199327e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.343590e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.343590e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.019173 sec -INFO: No Floating Point Exceptions have been reported - 3,378,128,655 cycles:u # 3.292 GHz (74.87%) - 6,791,969 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.06%) - 1,077,009,683 stalled-cycles-backend:u # 31.88% backend cycles idle (75.06%) - 8,071,602,588 instructions:u # 2.39 insn per cycle - # 0.13 stalled cycles per insn (75.06%) - 1.028260910 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3253) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.679787e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.897823e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.897823e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.291813 sec +INFO: No Floating Point Exceptions have been reported + 3,744,555,670 cycles # 2.888 GHz + 8,261,514,353 instructions # 2.21 insn per cycle + 1.297385166 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3367) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -157,16 +170,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186282850802 -Relative difference = 1.8321738890139266e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.138641e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.050679e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.050679e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.232224 sec +INFO: No Floating Point Exceptions have been reported + 3,566,706,619 cycles # 2.883 GHz + 7,912,197,395 instructions # 2.22 insn per cycle + 1.237921630 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3209) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.776715e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.464027e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.464027e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.632182 sec +INFO: No Floating Point Exceptions have been reported + 3,300,564,042 cycles # 2.017 GHz + 6,098,644,443 instructions # 1.85 insn per cycle + 1.637359770 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2251) (512y: 22) (512z: 2155) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt index 5f0c64fea0..114cd37caa 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_10:25:10 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:25:08 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.789516e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.145914e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.169020e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 -TOTAL : 0.333065 sec -INFO: No Floating Point Exceptions have been reported - 807,156,755 cycles:u # 2.331 GHz (76.13%) - 2,357,253 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.88%) - 6,916,176 stalled-cycles-backend:u # 0.86% backend cycles idle (74.29%) - 1,515,346,659 instructions:u # 1.88 insn per cycle - # 0.00 stalled cycles per insn (75.20%) - 0.388164521 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.148449e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.747307e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.868608e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.487780 sec +INFO: No Floating Point Exceptions have been reported + 2,112,765,884 cycles # 2.953 GHz + 3,008,781,494 instructions # 1.42 insn per cycle + 0.773144472 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.995767e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.084272e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.084272e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 3.647183 sec -INFO: No Floating Point Exceptions have been reported - 12,561,245,397 cycles:u # 3.437 GHz (74.90%) - 7,105,600 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.97%) - 1,877,455,833 stalled-cycles-backend:u # 14.95% backend cycles idle (75.05%) - 44,204,929,073 instructions:u # 3.52 insn per cycle - # 0.04 stalled cycles per insn (75.05%) - 3.659050401 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 574) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.032943e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.092094e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.092094e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 5.252513 sec +INFO: No Floating Point Exceptions have been reported + 15,985,799,367 cycles # 3.040 GHz + 44,469,540,251 instructions # 2.78 insn per cycle + 5.260076645 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 536) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198337657377 -Relative difference = 8.193642726087208e-08 +Avg ME (F77/C++) = 2.0288198669441044 +Relative difference = 6.558289825352968e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.526162e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.102574e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.102574e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.536647 sec -INFO: No Floating Point Exceptions have been reported - 5,204,363,119 cycles:u # 3.371 GHz (74.69%) - 6,659,030 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.88%) - 1,476,441,909 stalled-cycles-backend:u # 28.37% backend cycles idle (75.13%) - 16,884,742,552 instructions:u # 3.24 insn per cycle - # 0.09 stalled cycles per insn (75.13%) - 1.548405867 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2753) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.499648e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.992066e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.992066e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.003668 sec +INFO: No Floating Point Exceptions have been reported + 6,125,955,843 cycles # 3.046 GHz + 17,118,502,582 instructions # 2.79 insn per cycle + 2.011813253 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198775378987 -Relative difference = 6.036124513188701e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193075684831 +Relative difference = 1.515997647531052e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.932780e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.706486e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.706486e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.317001 sec -INFO: No Floating Point Exceptions have been reported - 4,431,535,580 cycles:u # 3.346 GHz (74.72%) - 7,766,562 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.98%) - 1,716,277,430 stalled-cycles-backend:u # 38.73% backend cycles idle (75.24%) - 10,221,463,894 instructions:u # 2.31 insn per cycle - # 0.17 stalled cycles per insn (75.24%) - 1.328848484 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3885) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.167880e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.760431e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.760431e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.797931 sec +INFO: No Floating Point Exceptions have been reported + 5,167,508,425 cycles # 2.864 GHz + 10,273,109,370 instructions # 1.99 insn per cycle + 1.805362641 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3907) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186282850802 -Relative difference = 1.8321738890139266e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.132241e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.737534e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.737534e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.807508 sec +INFO: No Floating Point Exceptions have been reported + 5,031,342,767 cycles # 2.773 GHz + 10,030,466,689 instructions # 1.99 insn per cycle + 1.815492489 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3806) (512y: 2) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288181869545951 +Relative difference = 9.214951531400725e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.445722e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.755335e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.755335e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 2.460163 sec +INFO: No Floating Point Exceptions have been reported + 4,428,510,644 cycles # 1.795 GHz + 8,482,456,603 instructions # 1.92 insn per cycle + 2.468701093 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2746) (512y: 4) (512z: 2754) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183148950338 +Relative difference = 1.5521108056421764e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt index 828077b7db..0b6cd11934 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:05:37 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:52:53 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.801286e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.142736e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.165759e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 -TOTAL : 0.362839 sec -INFO: No Floating Point Exceptions have been reported - 814,301,438 cycles:u # 2.315 GHz (75.31%) - 2,382,034 stalled-cycles-frontend:u # 0.29% frontend cycles idle (73.62%) - 8,301,301 stalled-cycles-backend:u # 1.02% backend cycles idle (73.88%) - 1,483,774,354 instructions:u # 1.82 insn per cycle - # 0.01 stalled cycles per insn (75.06%) - 0.419887438 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.102016e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.726185e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.849782e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.483121 sec +INFO: No Floating Point Exceptions have been reported + 2,119,072,326 cycles # 2.979 GHz + 3,036,201,097 instructions # 1.43 insn per cycle + 0.768161183 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.735275e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.868405e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.868405e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.952476 sec -INFO: No Floating Point Exceptions have been reported - 10,149,563,542 cycles:u # 3.428 GHz (74.90%) - 6,976,343 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.89%) - 1,068,495,460 stalled-cycles-backend:u # 10.53% backend cycles idle (74.89%) - 34,540,376,808 instructions:u # 3.40 insn per cycle - # 0.03 stalled cycles per insn (74.96%) - 2.965659979 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 762) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.582380e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.679265e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.679265e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 4.152623 sec +INFO: No Floating Point Exceptions have been reported + 12,621,162,156 cycles # 3.035 GHz + 34,636,169,934 instructions # 2.74 insn per cycle + 4.159998956 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 683) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199088536203 -Relative difference = 4.4925808981097166e-08 +Avg ME (F77/C++) = 2.0288199094356969 +Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.544250e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.127175e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.127175e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.535257 sec -INFO: No Floating Point Exceptions have been reported - 5,181,760,333 cycles:u # 3.358 GHz (74.93%) - 6,568,469 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.12%) - 1,886,815,713 stalled-cycles-backend:u # 36.41% backend cycles idle (75.12%) - 14,556,262,369 instructions:u # 2.81 insn per cycle - # 0.13 stalled cycles per insn (75.12%) - 1.547415442 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2947) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.435300e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.931883e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.931883e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 2.023526 sec +INFO: No Floating Point Exceptions have been reported + 6,181,207,719 cycles # 3.045 GHz + 14,841,948,094 instructions # 2.40 insn per cycle + 2.030877083 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2975) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198769558221 -Relative difference = 6.06481491495597e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193755550310 +Relative difference = 1.8511017053446366e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.713207e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.063516e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.063516e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.224721 sec -INFO: No Floating Point Exceptions have been reported - 4,075,097,190 cycles:u # 3.307 GHz (74.75%) - 7,173,604 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.74%) - 1,586,521,787 stalled-cycles-backend:u # 38.93% backend cycles idle (74.74%) - 8,954,862,198 instructions:u # 2.20 insn per cycle - # 0.18 stalled cycles per insn (74.92%) - 1.238013991 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4429) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.506636e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.401228e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.401228e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.488171 sec +INFO: No Floating Point Exceptions have been reported + 4,304,268,264 cycles # 2.880 GHz + 9,097,439,075 instructions # 2.11 insn per cycle + 1.495316579 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4456) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186736870557 -Relative difference = 1.6083886449260875e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182069780305 +Relative difference = 1.0201902325125583e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.617162e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.560068e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.560068e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.470806 sec +INFO: No Floating Point Exceptions have been reported + 4,247,597,214 cycles # 2.875 GHz + 8,690,729,651 instructions # 2.05 insn per cycle + 1.478175129 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4233) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182069780305 +Relative difference = 1.0201902325125583e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.756503e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.250884e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.250884e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.915696 sec +INFO: No Floating Point Exceptions have been reported + 3,876,375,719 cycles # 2.017 GHz + 7,836,694,757 instructions # 2.02 insn per cycle + 1.923109061 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4273) (512y: 0) (512z: 2558) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183246739209 +Relative difference = 1.6003107281264138e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt index 3386f14e63..99c5f1dd1c 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:05:46 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:53:12 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.781398e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.112844e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.134629e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.078077e+00 +- 3.394918e-03 ) GeV^0 -TOTAL : 0.334605 sec -INFO: No Floating Point Exceptions have been reported - 822,154,607 cycles:u # 2.356 GHz (74.95%) - 2,330,583 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.95%) - 8,335,753 stalled-cycles-backend:u # 1.01% backend cycles idle (75.41%) - 1,482,735,882 instructions:u # 1.80 insn per cycle - # 0.01 stalled cycles per insn (77.15%) - 0.391451760 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.190250e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.721947e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.846420e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.086719e+00 +- 3.413389e-03 ) GeV^0 +TOTAL : 0.485415 sec +INFO: No Floating Point Exceptions have been reported + 2,076,120,147 cycles # 2.913 GHz + 2,915,349,838 instructions # 1.40 insn per cycle + 0.769560564 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 126 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.028815e+00 -Avg ME (F77/GPU) = 2.0288173687877133 -Relative difference = 1.1675720622806321e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.028811e+00 +Avg ME (F77/GPU) = 2.0288499356247485 +Relative difference = 1.9191351362116207e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.993924e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.145465e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.145465e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404712e-03 ) GeV^0 -TOTAL : 2.770210 sec -INFO: No Floating Point Exceptions have been reported - 9,499,901,218 cycles:u # 3.420 GHz (75.05%) - 6,744,579 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.97%) - 7,175,092 stalled-cycles-backend:u # 0.08% backend cycles idle (74.95%) - 34,567,889,085 instructions:u # 3.64 insn per cycle - # 0.00 stalled cycles per insn (74.95%) - 2.782918776 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 434) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.762044e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.875011e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.875011e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086780e+00 +- 3.413794e-03 ) GeV^0 +TOTAL : 3.889711 sec +INFO: No Floating Point Exceptions have been reported + 11,863,310,263 cycles # 3.045 GHz + 35,106,472,280 instructions # 2.96 insn per cycle + 3.896935494 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 453) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288199088536203 -Relative difference = 4.4925808981097166e-08 +Avg ME (F77/C++) = 2.0288199094356969 +Relative difference = 4.463890496342449e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.915685e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.551891e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.551891e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079573e+00 +- 3.404713e-03 ) GeV^0 -TOTAL : 1.469689 sec -INFO: No Floating Point Exceptions have been reported - 4,958,077,800 cycles:u # 3.355 GHz (74.94%) - 6,834,456 stalled-cycles-frontend:u # 0.14% frontend cycles idle (75.10%) - 1,291,600,051 stalled-cycles-backend:u # 26.05% backend cycles idle (75.10%) - 13,965,595,655 instructions:u # 2.82 insn per cycle - # 0.09 stalled cycles per insn (75.10%) - 1.482492521 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2467) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.629807e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.149090e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.149090e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086779e+00 +- 3.413793e-03 ) GeV^0 +TOTAL : 1.958719 sec +INFO: No Floating Point Exceptions have been reported + 5,974,407,691 cycles # 3.040 GHz + 14,562,989,936 instructions # 2.44 insn per cycle + 1.965935304 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2569) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028820e+00 -Avg ME (F77/C++) = 2.0288198892958462 -Relative difference = 5.4565783974899003e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028819e+00 +Avg ME (F77/C++) = 2.0288193583255634 +Relative difference = 1.7661780742548925e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.034991e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.140334e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.140334e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.079551e+00 +- 3.404208e-03 ) GeV^0 -TOTAL : 1.157361 sec -INFO: No Floating Point Exceptions have been reported - 3,869,701,142 cycles:u # 3.321 GHz (74.69%) - 7,370,047 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.66%) - 1,428,425,272 stalled-cycles-backend:u # 36.91% backend cycles idle (74.89%) - 8,537,033,922 instructions:u # 2.21 insn per cycle - # 0.17 stalled cycles per insn (75.23%) - 1.169337912 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3397) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.627487e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.564550e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.564550e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.467639 sec +INFO: No Floating Point Exceptions have been reported + 4,208,313,007 cycles # 2.855 GHz + 8,876,905,434 instructions # 2.11 insn per cycle + 1.474726540 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3552) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.028819e+00 -Avg ME (F77/C++) = 2.0288186836987734 -Relative difference = 1.559041129563128e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182107033208 +Relative difference = 1.0385521077446488e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.625571e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.554690e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.554690e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.468279 sec +INFO: No Floating Point Exceptions have been reported + 4,239,649,829 cycles # 2.876 GHz + 8,443,717,794 instructions # 1.99 insn per cycle + 1.475031334 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3296) (512y: 0) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288182107033208 +Relative difference = 1.0385521077446488e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.780064e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.278902e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.278902e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086810e+00 +- 3.414231e-03 ) GeV^0 +TOTAL : 1.909081 sec +INFO: No Floating Point Exceptions have been reported + 3,835,043,638 cycles # 2.002 GHz + 7,729,492,795 instructions # 2.02 insn per cycle + 1.916628169 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3289) (512y: 0) (512z: 2110) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028818e+00 +Avg ME (F77/C++) = 2.0288183204829693 +Relative difference = 1.5796536184903122e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt index b4a030267e..6bbdeeb18d 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_10:25:20 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:25:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.843910e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.328014e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.346502e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.404179 sec -INFO: No Floating Point Exceptions have been reported - 993,813,076 cycles:u # 2.363 GHz (75.25%) - 2,358,772 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.16%) - 11,114,523 stalled-cycles-backend:u # 1.12% backend cycles idle (73.62%) - 1,620,766,934 instructions:u # 1.63 insn per cycle - # 0.01 stalled cycles per insn (74.22%) - 0.466477700 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.375168e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.358758e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.991650e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.539275 sec +INFO: No Floating Point Exceptions have been reported + 2,197,147,211 cycles # 2.830 GHz + 3,171,133,289 instructions # 1.44 insn per cycle + 0.834260682 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243869 -Relative difference = 3.241686434838304e-07 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.599453e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.665264e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.665264e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.216177 sec -INFO: No Floating Point Exceptions have been reported - 14,448,775,342 cycles:u # 3.418 GHz (74.95%) - 8,614,204 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.02%) - 3,816,067,915 stalled-cycles-backend:u # 26.41% backend cycles idle (75.02%) - 45,665,454,139 instructions:u # 3.16 insn per cycle - # 0.08 stalled cycles per insn (75.02%) - 4.232344682 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.863199e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.911060e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.911060e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.765001 sec +INFO: No Floating Point Exceptions have been reported + 17,514,965,969 cycles # 3.033 GHz + 46,180,069,488 instructions # 2.64 insn per cycle + 5.776213723 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 617) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.292500e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.477313e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.477313e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.621716 sec -INFO: No Floating Point Exceptions have been reported - 8,910,589,768 cycles:u # 3.384 GHz (74.88%) - 7,902,507 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.82%) - 2,747,123,853 stalled-cycles-backend:u # 30.83% backend cycles idle (74.96%) - 27,566,692,372 instructions:u # 3.09 insn per cycle - # 0.10 stalled cycles per insn (75.09%) - 2.637286078 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2518) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.331354e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.503723e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.503723e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.286290 sec +INFO: No Floating Point Exceptions have been reported + 10,049,467,521 cycles # 3.048 GHz + 27,685,234,952 instructions # 2.75 insn per cycle + 3.297791625 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.249154e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.909393e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.909393e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.634644 sec -INFO: No Floating Point Exceptions have been reported - 5,421,017,013 cycles:u # 3.293 GHz (74.77%) - 8,322,208 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.81%) - 933,804,845 stalled-cycles-backend:u # 17.23% backend cycles idle (75.05%) - 12,257,868,001 instructions:u # 2.26 insn per cycle - # 0.08 stalled cycles per insn (75.22%) - 1.651139869 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2668) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.194158e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.606158e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.606158e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.162271 sec +INFO: No Floating Point Exceptions have been reported + 6,182,412,740 cycles # 2.845 GHz + 12,592,550,468 instructions # 2.04 insn per cycle + 2.174037680 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2773) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063930599014 -Relative difference = 2.9916108265801754e-07 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.730742e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.240332e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.240332e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 1.970706 sec +INFO: No Floating Point Exceptions have been reported + 5,651,897,158 cycles # 2.853 GHz + 12,026,990,160 instructions # 2.13 insn per cycle + 1.982185993 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2518) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.609905e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.807717e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.807717e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.045690 sec +INFO: No Floating Point Exceptions have been reported + 5,750,600,034 cycles # 1.881 GHz + 8,210,466,675 instructions # 1.43 insn per cycle + 3.057406229 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1862) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt index 5f04e842f2..532bb9e416 100644 --- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_10:25:32 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:25:54 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.864951e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.362990e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.381998e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.087161e+00 +- 3.410053e-03 ) GeV^0 -TOTAL : 0.403410 sec -INFO: No Floating Point Exceptions have been reported - 1,017,326,099 cycles:u # 2.423 GHz (74.64%) - 2,265,428 stalled-cycles-frontend:u # 0.22% frontend cycles idle (77.25%) - 5,213,166 stalled-cycles-backend:u # 0.51% backend cycles idle (75.81%) - 1,577,279,794 instructions:u # 1.55 insn per cycle - # 0.00 stalled cycles per insn (74.07%) - 0.463037987 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.200313e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.637883e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.154555e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 0.530194 sec +INFO: No Floating Point Exceptions have been reported + 2,265,001,691 cycles # 2.959 GHz + 3,241,984,092 instructions # 1.43 insn per cycle + 0.823101283 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.028807e+00 -Avg ME (F77/GPU) = 2.0288063423243869 -Relative difference = 3.241686434838304e-07 +Avg ME (F77/GPU) = 2.0288063423243874 +Relative difference = 3.241686432649386e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.600556e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.665048e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.665048e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 4.211548 sec -INFO: No Floating Point Exceptions have been reported - 14,429,603,779 cycles:u # 3.417 GHz (75.00%) - 9,191,990 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) - 3,203,701,294 stalled-cycles-backend:u # 22.20% backend cycles idle (75.00%) - 44,592,650,458 instructions:u # 3.09 insn per cycle - # 0.07 stalled cycles per insn (75.01%) - 4.227847419 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 590) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.918727e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.970297e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.970297e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 5.603990 sec +INFO: No Floating Point Exceptions have been reported + 17,066,108,883 cycles # 3.040 GHz + 45,206,022,775 instructions # 2.65 insn per cycle + 5.614933216 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 568) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.624886e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.841031e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.841031e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 2.447870 sec -INFO: No Floating Point Exceptions have been reported - 8,253,818,162 cycles:u # 3.356 GHz (74.96%) - 9,137,802 stalled-cycles-frontend:u # 0.11% frontend cycles idle (74.95%) - 1,274,394,716 stalled-cycles-backend:u # 15.44% backend cycles idle (74.98%) - 26,416,039,672 instructions:u # 3.20 insn per cycle - # 0.05 stalled cycles per insn (74.98%) - 2.463520948 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2312) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.464266e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.650227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.650227e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 3.167234 sec +INFO: No Floating Point Exceptions have been reported + 9,655,586,507 cycles # 3.039 GHz + 26,360,660,752 instructions # 2.73 insn per cycle + 3.178764330 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 Avg ME (F77/C++) = 2.0288063903750300 Relative difference = 3.0048445715164216e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.491923e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.903415e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.903415e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.079401e+00 +- 3.402993e-03 ) GeV^0 -TOTAL : 1.794681 sec -INFO: No Floating Point Exceptions have been reported - 5,990,026,085 cycles:u # 3.317 GHz (74.86%) - 8,838,657 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.79%) - 1,793,680,422 stalled-cycles-backend:u # 29.94% backend cycles idle (74.80%) - 13,981,160,283 instructions:u # 2.33 insn per cycle - # 0.13 stalled cycles per insn (75.02%) - 1.810757952 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2871) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.662113e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.998348e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.998348e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.391394 sec +INFO: No Floating Point Exceptions have been reported + 6,882,477,617 cycles # 2.865 GHz + 14,143,328,395 instructions # 2.05 insn per cycle + 2.403055690 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2896) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.028807e+00 -Avg ME (F77/C++) = 2.0288063930599014 -Relative difference = 2.9916108265801754e-07 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.883189e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.244684e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.244684e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.286437 sec +INFO: No Floating Point Exceptions have been reported + 6,540,751,339 cycles # 2.848 GHz + 13,628,461,172 instructions # 2.08 insn per cycle + 2.297769147 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2535) (512y: 302) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.798205e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.010852e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.010852e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.086689e+00 +- 3.413217e-03 ) GeV^0 +TOTAL : 2.903935 sec +INFO: No Floating Point Exceptions have been reported + 5,730,017,108 cycles # 1.966 GHz + 9,320,315,455 instructions # 1.63 insn per cycle + 2.915703363 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2060) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.028807e+00 +Avg ME (F77/C++) = 2.0288064057068964 +Relative difference = 2.9292737240031234e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt index 4790fed1f8..2c8152e371 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-10-04_10:25:44 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:26:19 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.443417e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.546632e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.548481e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.431395 sec -INFO: No Floating Point Exceptions have been reported - 1,223,660,729 cycles:u # 2.802 GHz (75.64%) - 2,501,735 stalled-cycles-frontend:u # 0.20% frontend cycles idle (76.57%) - 10,246,254 stalled-cycles-backend:u # 0.84% backend cycles idle (75.46%) - 1,631,958,396 instructions:u # 1.33 insn per cycle - # 0.01 stalled cycles per insn (75.08%) - 0.483940074 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.471156e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.836503e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.949285e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.478957 sec +INFO: No Floating Point Exceptions have been reported + 1,977,748,469 cycles # 2.835 GHz + 2,830,254,496 instructions # 1.43 insn per cycle + 0.755464456 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.548785e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.673922e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.676381e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.714706 sec -INFO: No Floating Point Exceptions have been reported - 2,077,515,725 cycles:u # 2.828 GHz (74.26%) - 2,536,517 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.13%) - 8,256,797 stalled-cycles-backend:u # 0.40% backend cycles idle (74.50%) - 2,472,016,862 instructions:u # 1.19 insn per cycle - # 0.00 stalled cycles per insn (75.70%) - 0.775335817 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.039116e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.228066e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.239026e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.611145 sec +INFO: No Floating Point Exceptions have been reported + 2,507,647,227 cycles # 2.935 GHz + 3,822,892,757 instructions # 1.52 insn per cycle + 0.913494944 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418644 -Relative difference = 4.469239991780462e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.371112e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.386501e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.386501e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 4.884354 sec -INFO: No Floating Point Exceptions have been reported - 17,047,975,815 cycles:u # 3.488 GHz (74.96%) - 2,450,342 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) - 3,444,330,788 stalled-cycles-backend:u # 20.20% backend cycles idle (74.96%) - 56,934,701,049 instructions:u # 3.34 insn per cycle - # 0.06 stalled cycles per insn (74.97%) - 4.892198702 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1294) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.499122e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.511257e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.511257e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.576067 sec +INFO: No Floating Point Exceptions have been reported + 19,987,276,024 cycles # 3.038 GHz + 59,914,208,905 instructions # 3.00 insn per cycle + 6.580288357 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432429 -Relative difference = 4.4692302371173303e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.558448e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.616297e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.616297e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.522045 sec -INFO: No Floating Point Exceptions have been reported - 8,801,419,969 cycles:u # 3.486 GHz (75.01%) - 1,985,953 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.97%) - 1,760,948,190 stalled-cycles-backend:u # 20.01% backend cycles idle (74.97%) - 29,935,355,243 instructions:u # 3.40 insn per cycle - # 0.06 stalled cycles per insn (74.97%) - 2.529878750 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4647) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.746815e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.790146e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.790146e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.470619 sec +INFO: No Floating Point Exceptions have been reported + 10,568,573,836 cycles # 3.042 GHz + 31,084,482,719 instructions # 2.94 insn per cycle + 3.474810942 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432429 -Relative difference = 4.4692302371173303e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.328649e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.353089e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.353089e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.257226 sec -INFO: No Floating Point Exceptions have been reported - 4,393,002,412 cycles:u # 3.486 GHz (74.66%) - 2,099,409 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.66%) - 1,148,579,182 stalled-cycles-backend:u # 26.15% backend cycles idle (74.96%) - 11,105,205,332 instructions:u # 2.53 insn per cycle - # 0.10 stalled cycles per insn (75.23%) - 1.264599738 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4251) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.452682e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.618975e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.618975e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.752355 sec +INFO: No Floating Point Exceptions have been reported + 4,998,647,040 cycles # 2.847 GHz + 11,404,728,427 instructions # 2.28 insn per cycle + 1.756553925 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.066971e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.088589e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.088589e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.554927 sec +INFO: No Floating Point Exceptions have been reported + 4,438,094,520 cycles # 2.847 GHz + 10,663,641,043 instructions # 2.40 insn per cycle + 1.559324939 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 92) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.520624e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.626785e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.626785e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.200273 sec +INFO: No Floating Point Exceptions have been reported + 4,124,597,483 cycles # 1.872 GHz + 5,971,571,779 instructions # 1.45 insn per cycle + 2.204632407 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 94) (512z: 3577) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt index ddc33c0955..74c8e6c686 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,97 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-10-04_11:14:57 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:01:47 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.225611e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.530645e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.530645e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 0.600089 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,780,961,964 cycles:u # 2.990 GHz (74.01%) - 6,588,994 stalled-cycles-frontend:u # 0.37% frontend cycles idle (76.14%) - 279,320,328 stalled-cycles-backend:u # 15.68% backend cycles idle (76.49%) - 2,180,914,415 instructions:u # 1.22 insn per cycle - # 0.13 stalled cycles per insn (75.80%) - 0.651924943 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.545911e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.255095e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.255095e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.500354 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,107,574,632 cycles # 2.945 GHz + 3,182,291,906 instructions # 1.51 insn per cycle + 0.772902799 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.811687e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.611689e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.611689e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.948724e+03 +- 1.840727e+03 ) GeV^-2 -TOTAL : 1.363596 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,111,137,981 cycles:u # 2.990 GHz (74.47%) - 16,125,606 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.61%) - 835,955,669 stalled-cycles-backend:u # 20.33% backend cycles idle (74.98%) - 4,214,779,200 instructions:u # 1.03 insn per cycle - # 0.20 stalled cycles per insn (74.98%) - 1.436651722 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.654170e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.373478e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.373478e+06 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.843085 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,201,455,709 cycles # 2.923 GHz + 5,064,301,689 instructions # 1.58 insn per cycle + 1.157821824 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -79,36 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418644 -Relative difference = 4.469239991780462e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.374363e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.389785e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.389785e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 4.883881 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 17,104,860,522 cycles:u # 3.500 GHz (74.96%) - 2,428,687 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) - 3,612,152,787 stalled-cycles-backend:u # 21.12% backend cycles idle (74.96%) - 56,962,728,913 instructions:u # 3.33 insn per cycle - # 0.06 stalled cycles per insn (74.96%) - 4.891525423 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1294) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.519976e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.532732e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.532732e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.529594 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 19,914,538,030 cycles # 3.049 GHz + 59,920,714,356 instructions # 3.01 insn per cycle + 6.534061095 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -116,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432429 -Relative difference = 4.4692302371173303e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.582531e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.640749e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.640749e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.517173 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 8,819,312,790 cycles:u # 3.499 GHz (74.95%) - 2,247,704 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.93%) - 1,784,640,885 stalled-cycles-backend:u # 20.24% backend cycles idle (74.93%) - 29,976,004,853 instructions:u # 3.40 insn per cycle - # 0.06 stalled cycles per insn (74.93%) - 2.524864599 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4647) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.734084e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.778629e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.778629e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.488369 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,606,558,779 cycles # 3.037 GHz + 31,134,023,580 instructions # 2.94 insn per cycle + 3.492950294 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -153,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432429 -Relative difference = 4.4692302371173303e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.328730e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.352996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.352996e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.260923 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,408,283,040 cycles:u # 3.487 GHz (74.70%) - 2,316,722 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.74%) - 1,152,356,331 stalled-cycles-backend:u # 26.14% backend cycles idle (75.06%) - 11,138,060,442 instructions:u # 2.53 insn per cycle - # 0.10 stalled cycles per insn (75.33%) - 1.268502762 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4251) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.451546e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.625575e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.625575e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.760502 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,040,359,107 cycles # 2.857 GHz + 11,455,585,139 instructions # 2.27 insn per cycle + 1.764980096 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4642) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -190,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.064061e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.085709e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.085709e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.566477 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,478,283,811 cycles # 2.852 GHz + 10,714,144,344 instructions # 2.39 insn per cycle + 1.571016295 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4378) (512y: 92) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.519249e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.630304e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.630304e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.208574 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,162,288,033 cycles # 1.882 GHz + 6,009,903,592 instructions # 1.44 insn per cycle + 2.213156087 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1620) (512y: 94) (512z: 3577) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt index c1e0e45788..2504d6cb2f 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-10-04_10:25:58 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:26:44 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.465238e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.566482e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.568358e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.418222 sec -INFO: No Floating Point Exceptions have been reported - 1,183,474,852 cycles:u # 2.738 GHz (75.76%) - 2,497,591 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.74%) - 4,803,862 stalled-cycles-backend:u # 0.41% backend cycles idle (75.22%) - 1,692,488,285 instructions:u # 1.43 insn per cycle - # 0.00 stalled cycles per insn (74.30%) - 0.470754483 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.573081e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.880652e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.992912e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.473448 sec +INFO: No Floating Point Exceptions have been reported + 1,997,107,285 cycles # 2.887 GHz + 2,802,455,481 instructions # 1.40 insn per cycle + 0.748795790 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.554225e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.680598e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.683051e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.710862 sec -INFO: No Floating Point Exceptions have been reported - 2,017,250,246 cycles:u # 2.760 GHz (75.44%) - 2,412,871 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.92%) - 6,852,730 stalled-cycles-backend:u # 0.34% backend cycles idle (75.94%) - 2,437,580,973 instructions:u # 1.21 insn per cycle - # 0.00 stalled cycles per insn (75.64%) - 0.774920863 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.042916e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.233761e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.244311e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.612101 sec +INFO: No Floating Point Exceptions have been reported + 2,523,217,642 cycles # 2.962 GHz + 3,820,710,011 instructions # 1.51 insn per cycle + 0.913471570 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213684418644 -Relative difference = 4.469239991780462e-07 +Avg ME (F77/GPU) = 1.4131213684418649 +Relative difference = 4.469239988637851e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.535419e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.552435e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.552435e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 4.658100 sec -INFO: No Floating Point Exceptions have been reported - 16,269,519,657 cycles:u # 3.490 GHz (74.96%) - 2,449,188 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.94%) - 3,788,080,771 stalled-cycles-backend:u # 23.28% backend cycles idle (74.94%) - 56,645,841,981 instructions:u # 3.48 insn per cycle - # 0.07 stalled cycles per insn (74.93%) - 4.665223880 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 924) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.478144e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.490358e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.490358e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.631814 sec +INFO: No Floating Point Exceptions have been reported + 19,904,693,493 cycles # 3.001 GHz + 60,129,356,320 instructions # 3.02 insn per cycle + 6.635977885 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432427 -Relative difference = 4.4692302386886357e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.323977e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.378429e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.378429e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.614471 sec -INFO: No Floating Point Exceptions have been reported - 9,147,034,129 cycles:u # 3.495 GHz (74.94%) - 2,026,594 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.94%) - 2,648,071,523 stalled-cycles-backend:u # 28.95% backend cycles idle (74.94%) - 30,366,242,847 instructions:u # 3.32 insn per cycle - # 0.09 stalled cycles per insn (74.94%) - 2.621658552 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4697) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.788891e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.832354e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.832354e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.440533 sec +INFO: No Floating Point Exceptions have been reported + 10,474,336,033 cycles # 3.041 GHz + 30,686,738,264 instructions # 2.93 insn per cycle + 3.444912048 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684432431 -Relative difference = 4.4692302355460254e-07 +Avg ME (F77/C++) = 1.4131213684432433 +Relative difference = 4.46923023397472e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.233547e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.254406e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.254406e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.351825 sec -INFO: No Floating Point Exceptions have been reported - 4,729,834,556 cycles:u # 3.491 GHz (74.66%) - 1,846,450 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.73%) - 1,487,747,818 stalled-cycles-backend:u # 31.45% backend cycles idle (75.02%) - 11,735,041,331 instructions:u # 2.48 insn per cycle - # 0.13 stalled cycles per insn (75.20%) - 1.358925233 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4465) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.260057e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.421960e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.421960e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.788469 sec +INFO: No Floating Point Exceptions have been reported + 5,127,771,337 cycles # 2.862 GHz + 11,838,347,484 instructions # 2.31 insn per cycle + 1.792570031 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4746) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 -Avg ME (F77/C++) = 1.4131213684416484 -Relative difference = 4.469241520660492e-07 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.006530e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.025807e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.025807e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.647024 sec +INFO: No Floating Point Exceptions have been reported + 4,720,484,931 cycles # 2.860 GHz + 11,163,899,176 instructions # 2.36 insn per cycle + 1.651308834 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4403) (512y: 246) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416466 +Relative difference = 4.469241533230934e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.518189e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.624521e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.624521e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.200607 sec +INFO: No Floating Point Exceptions have been reported + 4,154,063,919 cycles # 1.885 GHz + 6,222,924,057 instructions # 1.50 insn per cycle + 2.204886027 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1516) (512y: 139) (512z: 3679) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213684416484 +Relative difference = 4.469241520660492e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt index 90704b15e2..e312f04d1e 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-10-04_10:26:11 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:27:09 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.186904e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.694908e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.703402e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.415273e+04 +- 1.288237e+04 ) GeV^-2 -TOTAL : 0.357486 sec -INFO: No Floating Point Exceptions have been reported - 949,950,868 cycles:u # 2.571 GHz (74.71%) - 2,550,775 stalled-cycles-frontend:u # 0.27% frontend cycles idle (74.27%) - 5,518,485 stalled-cycles-backend:u # 0.58% backend cycles idle (73.93%) - 1,513,211,065 instructions:u # 1.59 insn per cycle - # 0.00 stalled cycles per insn (74.28%) - 0.411426189 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.675849e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.049912e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.089991e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.458226 sec +INFO: No Floating Point Exceptions have been reported + 1,987,161,645 cycles # 2.947 GHz + 2,815,757,381 instructions # 1.42 insn per cycle + 0.732664597 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.009358e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.058985e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.073606e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.619625e+05 +- 1.611328e+05 ) GeV^-2 -TOTAL : 0.519372 sec -INFO: No Floating Point Exceptions have been reported - 1,488,011,322 cycles:u # 2.781 GHz (75.20%) - 2,501,614 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.20%) - 5,177,253 stalled-cycles-backend:u # 0.35% backend cycles idle (74.20%) - 1,942,025,908 instructions:u # 1.31 insn per cycle - # 0.00 stalled cycles per insn (73.48%) - 0.575349084 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.675349e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.381609e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.425889e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 +TOTAL : 0.509054 sec +INFO: No Floating Point Exceptions have been reported + 2,180,524,483 cycles # 2.942 GHz + 3,107,964,411 instructions # 1.43 insn per cycle + 0.800068245 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.412410e+00 -Avg ME (F77/GPU) = 1.4131674300257941 -Relative difference = 0.0005362678158567296 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.700255e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.719603e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.719603e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 4.450045 sec -INFO: No Floating Point Exceptions have been reported - 15,564,829,810 cycles:u # 3.496 GHz (74.94%) - 1,889,922 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.03%) - 2,435,487,135 stalled-cycles-backend:u # 15.65% backend cycles idle (75.03%) - 56,541,733,242 instructions:u # 3.63 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 4.457091697 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1190) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.601007e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.614246e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.614246e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.317543 sec +INFO: No Floating Point Exceptions have been reported + 19,251,894,030 cycles # 3.046 GHz + 59,613,754,091 instructions # 3.10 insn per cycle + 6.321648054 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129859809517598 -Relative difference = 1.3480841507557613e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129949096991936 +Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.147844e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.166747e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.166747e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.449197 sec -INFO: No Floating Point Exceptions have been reported - 5,067,109,496 cycles:u # 3.490 GHz (74.67%) - 1,374,641 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.81%) - 1,585,658,388 stalled-cycles-backend:u # 31.29% backend cycles idle (75.09%) - 16,235,790,558 instructions:u # 3.20 insn per cycle - # 0.10 stalled cycles per insn (75.21%) - 1.456282420 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5124) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.351291e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.489859e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.489859e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.978919 sec +INFO: No Floating Point Exceptions have been reported + 6,013,687,882 cycles # 3.034 GHz + 17,062,971,129 instructions # 2.84 insn per cycle + 1.983047133 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129857731430207 -Relative difference = 1.6055147002442227e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129954647353316 +Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.476791e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.563653e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.563653e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.683006 sec -INFO: No Floating Point Exceptions have been reported - 2,394,357,438 cycles:u # 3.491 GHz (74.61%) - 1,766,897 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.36%) - 742,313,096 stalled-cycles-backend:u # 31.00% backend cycles idle (74.44%) - 6,040,131,133 instructions:u # 2.52 insn per cycle - # 0.12 stalled cycles per insn (75.02%) - 0.690178868 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4734) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.804689e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.868315e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.868315e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.925391 sec +INFO: No Floating Point Exceptions have been reported + 2,640,566,333 cycles # 2.843 GHz + 6,187,446,358 instructions # 2.34 insn per cycle + 0.929575730 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162101620087 -Relative difference = 1.4870135814264702e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.998130e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.078369e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.078369e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.837375 sec +INFO: No Floating Point Exceptions have been reported + 2,403,180,656 cycles # 2.859 GHz + 5,790,065,517 instructions # 2.41 insn per cycle + 0.841354194 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4906) (512y: 37) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.523426e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.570346e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.570346e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.095188 sec +INFO: No Floating Point Exceptions have been reported + 2,074,566,855 cycles # 1.888 GHz + 3,391,536,157 instructions # 1.63 insn per cycle + 1.099528954 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 37) (512z: 3789) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt index c796d650cd..316a025050 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,97 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-10-04_11:15:12 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:02:12 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.313066e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.769718e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.769718e+06 ) sec^-1 -MeanMatrixElemValue = ( 4.755508e+02 +- 2.671054e+02 ) GeV^-2 -TOTAL : 0.510844 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,532,392,567 cycles:u # 2.908 GHz (75.38%) - 10,395,405 stalled-cycles-frontend:u # 0.68% frontend cycles idle (74.22%) - 255,461,743 stalled-cycles-backend:u # 16.67% backend cycles idle (74.22%) - 1,965,347,850 instructions:u # 1.28 insn per cycle - # 0.13 stalled cycles per insn (73.19%) - 0.559680752 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.524999e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.496444e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.496444e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.009071e+02 +- 5.002295e+01 ) GeV^-2 +TOTAL : 0.466645 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,011,613,909 cycles # 2.942 GHz + 2,949,378,989 instructions # 1.47 insn per cycle + 0.740958646 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.573134e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.558732e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.558732e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.855939e+03 +- 1.791987e+03 ) GeV^-2 -TOTAL : 1.126462 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,475,500,716 cycles:u # 3.024 GHz (75.07%) - 29,682,134 stalled-cycles-frontend:u # 0.85% frontend cycles idle (74.64%) - 835,431,380 stalled-cycles-backend:u # 24.04% backend cycles idle (74.68%) - 3,788,788,425 instructions:u # 1.09 insn per cycle - # 0.22 stalled cycles per insn (74.77%) - 1.185344790 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.680079e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.266918e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.266918e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.737499e+02 +- 4.776369e+02 ) GeV^-2 +TOTAL : 0.645054 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,639,460,011 cycles # 2.993 GHz + 4,010,655,501 instructions # 1.52 insn per cycle + 0.939491422 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -79,36 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.412410e+00 -Avg ME (F77/GPU) = 1.4131674300257941 -Relative difference = 0.0005362678158567296 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.713043e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.732413e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.732413e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 4.436399 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 15,517,029,137 cycles:u # 3.495 GHz (74.95%) - 2,415,118 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.95%) - 2,404,612,148 stalled-cycles-backend:u # 15.50% backend cycles idle (74.99%) - 56,652,779,707 instructions:u # 3.65 insn per cycle - # 0.04 stalled cycles per insn (75.08%) - 4.443610629 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1190) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.574010e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.587324e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.587324e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.387615 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 19,269,777,585 cycles # 3.015 GHz + 59,617,998,643 instructions # 3.09 insn per cycle + 6.391840570 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 959) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -116,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129859809517598 -Relative difference = 1.3480841507557613e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129949096991936 +Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.155179e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.174142e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.174142e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.442917 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,052,789,110 cycles:u # 3.495 GHz (74.89%) - 1,393,507 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.10%) - 1,543,898,572 stalled-cycles-backend:u # 30.56% backend cycles idle (75.11%) - 16,257,391,621 instructions:u # 3.22 insn per cycle - # 0.09 stalled cycles per insn (75.11%) - 1.450066534 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5124) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.399391e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.540572e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.540572e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.972149 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,029,722,967 cycles # 3.052 GHz + 17,109,872,648 instructions # 2.84 insn per cycle + 1.976404451 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -153,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129857731430207 -Relative difference = 1.6055147002442227e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129954647353316 +Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.467698e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.553341e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.553341e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.687943 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,390,771,196 cycles:u # 3.460 GHz (74.53%) - 1,674,608 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.53%) - 739,949,017 stalled-cycles-backend:u # 30.95% backend cycles idle (74.87%) - 6,072,314,919 instructions:u # 2.54 insn per cycle - # 0.12 stalled cycles per insn (75.45%) - 0.695001619 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4734) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.805556e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.869603e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.869603e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.929046 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,658,179,637 cycles # 2.850 GHz + 6,224,135,366 instructions # 2.34 insn per cycle + 0.933362485 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5105) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -190,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162101620087 -Relative difference = 1.4870135814264702e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.997018e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.074315e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.074315e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.841770 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,421,588,452 cycles # 2.865 GHz + 5,827,320,634 instructions # 2.41 insn per cycle + 0.845895734 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4906) (512y: 37) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.537158e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.584935e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.584935e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.089934 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,096,708,167 cycles # 1.917 GHz + 3,432,903,656 instructions # 1.64 insn per cycle + 1.094288094 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2237) (512y: 37) (512z: 3789) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt index 8ec9721fb6..a72633a312 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-10-04_10:26:22 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:27:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.331902e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.830401e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.838695e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.415273e+04 +- 1.288237e+04 ) GeV^-2 -TOTAL : 0.354115 sec -INFO: No Floating Point Exceptions have been reported - 934,588,835 cycles:u # 2.542 GHz (76.62%) - 2,338,444 stalled-cycles-frontend:u # 0.25% frontend cycles idle (73.53%) - 9,513,633 stalled-cycles-backend:u # 1.02% backend cycles idle (71.64%) - 1,554,664,984 instructions:u # 1.66 insn per cycle - # 0.01 stalled cycles per insn (74.42%) - 0.406943143 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.649129e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.022553e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.063512e+07 ) sec^-1 +MeanMatrixElemValue = ( 1.008472e+02 +- 5.002447e+01 ) GeV^-2 +TOTAL : 0.453345 sec +INFO: No Floating Point Exceptions have been reported + 1,975,862,611 cycles # 2.945 GHz + 2,757,171,653 instructions # 1.40 insn per cycle + 0.728260674 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 226 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.749540e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.744497e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.759194e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.619625e+05 +- 1.611328e+05 ) GeV^-2 -TOTAL : 0.514411 sec -INFO: No Floating Point Exceptions have been reported - 1,416,473,483 cycles:u # 2.667 GHz (75.74%) - 2,369,760 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.50%) - 9,030,285 stalled-cycles-backend:u # 0.64% backend cycles idle (75.36%) - 1,905,808,052 instructions:u # 1.35 insn per cycle - # 0.00 stalled cycles per insn (75.20%) - 0.571806967 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.669823e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.371781e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.417808e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.630097e+02 +- 4.770717e+02 ) GeV^-2 +TOTAL : 0.507852 sec +INFO: No Floating Point Exceptions have been reported + 2,173,149,896 cycles # 2.944 GHz + 3,150,374,983 instructions # 1.45 insn per cycle + 0.795545558 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.412410e+00 -Avg ME (F77/GPU) = 1.4131674300257941 -Relative difference = 0.0005362678158567296 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.412607e+00 +Avg ME (F77/GPU) = 1.4132214305330990 +Relative difference = 0.0004349621183379836 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.764713e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.784565e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.784565e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.724764e+02 +- 2.665343e+02 ) GeV^-2 -TOTAL : 4.373695 sec -INFO: No Floating Point Exceptions have been reported - 15,305,777,282 cycles:u # 3.497 GHz (74.97%) - 1,857,464 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.96%) - 2,679,974,053 stalled-cycles-backend:u # 17.51% backend cycles idle (74.96%) - 56,406,318,615 instructions:u # 3.69 insn per cycle - # 0.05 stalled cycles per insn (74.96%) - 4.380804919 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1124) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.581112e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.594237e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.594237e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 6.365783 sec +INFO: No Floating Point Exceptions have been reported + 19,419,491,454 cycles # 3.049 GHz + 59,350,763,877 instructions # 3.06 insn per cycle + 6.369878540 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129859511640177 -Relative difference = 3.456225494743424e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129949096991936 +Relative difference = 6.390737857384068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.148887e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168173e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.168173e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.724763e+02 +- 2.665342e+02 ) GeV^-2 -TOTAL : 1.447872 sec -INFO: No Floating Point Exceptions have been reported - 5,058,532,173 cycles:u # 3.487 GHz (74.79%) - 2,559,973 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.07%) - 1,503,389,024 stalled-cycles-backend:u # 29.72% backend cycles idle (75.19%) - 16,330,983,548 instructions:u # 3.23 insn per cycle - # 0.09 stalled cycles per insn (75.19%) - 1.454792272 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 5045) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.722765e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.878130e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.878130e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.009236e+02 +- 5.002643e+01 ) GeV^-2 +TOTAL : 1.895285 sec +INFO: No Floating Point Exceptions have been reported + 5,768,191,166 cycles # 3.038 GHz + 16,850,391,369 instructions # 2.92 insn per cycle + 1.899458861 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5610) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.412986e+00 -Avg ME (F77/C++) = 1.4129858306637857 -Relative difference = 1.1984281117008586e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.412995e+00 +Avg ME (F77/C++) = 1.4129954647353316 +Relative difference = 3.2890090308261873e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.142260e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.206581e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.206581e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.743733e+02 +- 2.676611e+02 ) GeV^-2 -TOTAL : 0.786409 sec -INFO: No Floating Point Exceptions have been reported - 2,741,188,065 cycles:u # 3.473 GHz (74.67%) - 1,874,301 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.67%) - 828,334,786 stalled-cycles-backend:u # 30.22% backend cycles idle (74.67%) - 6,730,777,833 instructions:u # 2.46 insn per cycle - # 0.12 stalled cycles per insn (74.96%) - 0.793478400 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5386) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.566708e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.614620e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.614620e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.063083 sec +INFO: No Floating Point Exceptions have been reported + 3,015,561,521 cycles # 2.827 GHz + 6,848,133,630 instructions # 2.27 insn per cycle + 1.067048166 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5735) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.413316e+00 -Avg ME (F77/C++) = 1.4133162101620087 -Relative difference = 1.4870135814264702e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.699136e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.754996e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.754996e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008857e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 0.981580 sec +INFO: No Floating Point Exceptions have been reported + 2,791,734,989 cycles # 2.834 GHz + 6,437,581,289 instructions # 2.31 insn per cycle + 0.985661400 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5509) (512y: 23) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413313e+00 +Avg ME (F77/C++) = 1.4133132969790267 +Relative difference = 2.1012969292986113e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.392917e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.431841e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.431841e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008856e+02 +- 5.002468e+01 ) GeV^-2 +TOTAL : 1.195865 sec +INFO: No Floating Point Exceptions have been reported + 2,253,891,023 cycles # 1.880 GHz + 3,755,508,897 instructions # 1.67 insn per cycle + 1.200023887 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2467) (512y: 28) (512z: 4084) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413316e+00 +Avg ME (F77/C++) = 1.4133164033579249 +Relative difference = 2.85398258307829e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt index be15d7acf8..0b1d518f1a 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-10-04_10:26:33 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:27:51 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.446821e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.550657e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.552436e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.420211 sec -INFO: No Floating Point Exceptions have been reported - 1,217,917,962 cycles:u # 2.805 GHz (75.50%) - 2,572,917 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.70%) - 5,684,122 stalled-cycles-backend:u # 0.47% backend cycles idle (75.40%) - 1,664,013,530 instructions:u # 1.37 insn per cycle - # 0.00 stalled cycles per insn (75.24%) - 0.472252045 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.453948e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.811550e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.927121e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.473105 sec +INFO: No Floating Point Exceptions have been reported + 2,033,581,083 cycles # 2.945 GHz + 2,886,020,774 instructions # 1.42 insn per cycle + 0.747799818 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.569439e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.691928e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.694396e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.714700 sec -INFO: No Floating Point Exceptions have been reported - 2,021,505,861 cycles:u # 2.755 GHz (75.85%) - 2,526,037 stalled-cycles-frontend:u # 0.12% frontend cycles idle (76.04%) - 10,206,457 stalled-cycles-backend:u # 0.50% backend cycles idle (75.40%) - 2,379,846,608 instructions:u # 1.18 insn per cycle - # 0.00 stalled cycles per insn (75.47%) - 0.779298099 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.031801e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.220510e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.231086e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.618243 sec +INFO: No Floating Point Exceptions have been reported + 2,476,239,534 cycles # 2.865 GHz + 3,788,069,315 instructions # 1.53 insn per cycle + 0.921690466 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569483 -Relative difference = 4.4188898885662695e-07 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.367591e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.382854e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.382854e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 4.889428 sec -INFO: No Floating Point Exceptions have been reported - 17,111,064,706 cycles:u # 3.498 GHz (75.00%) - 2,437,133 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 3,998,176,859 stalled-cycles-backend:u # 23.37% backend cycles idle (74.98%) - 57,731,287,493 instructions:u # 3.37 insn per cycle - # 0.07 stalled cycles per insn (74.98%) - 4.896906963 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1219) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.460583e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.472611e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.472611e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.679183 sec +INFO: No Floating Point Exceptions have been reported + 20,182,288,201 cycles # 3.020 GHz + 60,947,365,488 instructions # 3.02 insn per cycle + 6.683352736 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.454121e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.510241e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.510241e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.562565 sec -INFO: No Floating Point Exceptions have been reported - 8,978,187,022 cycles:u # 3.500 GHz (74.87%) - 395,957 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.00%) - 2,249,240,551 stalled-cycles-backend:u # 25.05% backend cycles idle (75.06%) - 29,645,099,918 instructions:u # 3.30 insn per cycle - # 0.08 stalled cycles per insn (75.06%) - 2.569887817 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4755) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.800189e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.844205e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.844205e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.432628 sec +INFO: No Floating Point Exceptions have been reported + 10,469,819,938 cycles # 3.047 GHz + 30,821,820,054 instructions # 2.94 insn per cycle + 3.436918127 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5351) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.337143e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.361759e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.361759e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.248762 sec -INFO: No Floating Point Exceptions have been reported - 4,378,842,882 cycles:u # 3.498 GHz (74.86%) - 1,908,946 stalled-cycles-frontend:u # 0.04% frontend cycles idle (75.08%) - 1,206,536,527 stalled-cycles-backend:u # 27.55% backend cycles idle (75.08%) - 11,042,976,514 instructions:u # 2.52 insn per cycle - # 0.11 stalled cycles per insn (75.08%) - 1.256226988 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4405) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.488717e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.659662e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.659662e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.746217 sec +INFO: No Floating Point Exceptions have been reported + 4,956,337,420 cycles # 2.833 GHz + 11,358,030,238 instructions # 2.29 insn per cycle + 1.750493549 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.087485e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.109461e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.109461e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.526196 sec +INFO: No Floating Point Exceptions have been reported + 4,378,050,988 cycles # 2.862 GHz + 10,608,750,677 instructions # 2.42 insn per cycle + 1.530411654 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4503) (512y: 84) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.342670e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.443900e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.443900e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.253273 sec +INFO: No Floating Point Exceptions have been reported + 4,230,871,375 cycles # 1.875 GHz + 6,168,087,523 instructions # 1.46 insn per cycle + 2.257413172 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2143) (512y: 116) (512z: 3653) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213786174055 +Relative difference = 4.3972324717191576e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt index dc83255293..e4a40e8315 100644 --- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -DATE: 2024-10-04_10:26:47 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:28:16 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.437038e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.540938e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.542765e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.872208e+03 +- 2.725298e+03 ) GeV^-2 -TOTAL : 0.419292 sec -INFO: No Floating Point Exceptions have been reported - 1,183,272,557 cycles:u # 2.737 GHz (75.83%) - 2,546,572 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.90%) - 5,185,977 stalled-cycles-backend:u # 0.44% backend cycles idle (74.50%) - 1,665,315,705 instructions:u # 1.41 insn per cycle - # 0.00 stalled cycles per insn (74.52%) - 0.470888876 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.542800e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.917661e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.043581e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 0.476455 sec +INFO: No Floating Point Exceptions have been reported + 2,040,505,669 cycles # 2.943 GHz + 2,877,681,232 instructions # 1.41 insn per cycle + 0.752591733 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.552249e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.674201e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.676662e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.805651e+03 +- 1.746055e+03 ) GeV^-2 -TOTAL : 0.706152 sec -INFO: No Floating Point Exceptions have been reported - 2,033,734,755 cycles:u # 2.796 GHz (76.00%) - 2,524,051 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.21%) - 10,374,444 stalled-cycles-backend:u # 0.51% backend cycles idle (73.94%) - 2,447,976,048 instructions:u # 1.20 insn per cycle - # 0.00 stalled cycles per insn (73.55%) - 0.767744463 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.038811e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.230331e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.241436e+07 ) sec^-1 +MeanMatrixElemValue = ( 6.734461e+02 +- 4.775415e+02 ) GeV^-2 +TOTAL : 0.611030 sec +INFO: No Floating Point Exceptions have been reported + 2,506,600,773 cycles # 2.949 GHz + 3,681,760,020 instructions # 1.47 insn per cycle + 0.910379508 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.413122e+00 -Avg ME (F77/GPU) = 1.4131213755569483 -Relative difference = 4.4188898885662695e-07 +Avg ME (F77/GPU) = 1.4131213755569487 +Relative difference = 4.418889885423659e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.503618e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.520129e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.520129e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 4.700248 sec -INFO: No Floating Point Exceptions have been reported - 16,447,289,759 cycles:u # 3.497 GHz (75.01%) - 2,477,835 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.00%) - 3,270,078,877 stalled-cycles-backend:u # 19.88% backend cycles idle (75.00%) - 57,493,893,321 instructions:u # 3.50 insn per cycle - # 0.06 stalled cycles per insn (75.00%) - 4.710930850 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 866) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.449767e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.461764e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.461764e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 6.708236 sec +INFO: No Floating Point Exceptions have been reported + 20,306,339,981 cycles # 3.026 GHz + 61,171,716,860 instructions # 3.01 insn per cycle + 6.712534448 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213859069593 Relative difference = 4.345647726386255e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.700429e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.760941e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.760941e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 2.468976 sec -INFO: No Floating Point Exceptions have been reported - 8,641,928,544 cycles:u # 3.496 GHz (74.81%) - 2,103,592 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.92%) - 1,771,646,315 stalled-cycles-backend:u # 20.50% backend cycles idle (75.06%) - 30,122,551,249 instructions:u # 3.49 insn per cycle - # 0.06 stalled cycles per insn (75.08%) - 2.476571876 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4834) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.866725e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.912249e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.912249e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 3.385607 sec +INFO: No Floating Point Exceptions have been reported + 10,321,183,247 cycles # 3.045 GHz + 30,532,396,911 instructions # 2.96 insn per cycle + 3.389791787 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 5155) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213792564823 Relative difference = 4.392710025734405e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.248849e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.270219e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.270219e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.740115e+02 +- 2.671575e+02 ) GeV^-2 -TOTAL : 1.335543 sec -INFO: No Floating Point Exceptions have been reported - 4,669,165,070 cycles:u # 3.488 GHz (74.90%) - 2,234,864 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.90%) - 1,490,886,221 stalled-cycles-backend:u # 31.93% backend cycles idle (74.90%) - 11,673,442,224 instructions:u # 2.50 insn per cycle - # 0.13 stalled cycles per insn (74.90%) - 1.342804791 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4625) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.169860e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.331537e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.331537e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.806172 sec +INFO: No Floating Point Exceptions have been reported + 5,142,039,126 cycles # 2.841 GHz + 11,872,343,877 instructions # 2.31 insn per cycle + 1.810450515 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4887) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.413122e+00 Avg ME (F77/C++) = 1.4131213600217192 Relative difference = 4.5288254008796884e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.017735e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.037222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.037222e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 1.629135 sec +INFO: No Floating Point Exceptions have been reported + 4,678,302,214 cycles # 2.865 GHz + 11,166,912,050 instructions # 2.39 insn per cycle + 1.633419328 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4508) (512y: 239) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213600217192 +Relative difference = 4.5288254008796884e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.334630e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.438622e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.438622e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.008920e+02 +- 5.001681e+01 ) GeV^-2 +TOTAL : 2.255377 sec +INFO: No Floating Point Exceptions have been reported + 4,246,914,613 cycles # 1.880 GHz + 6,410,235,153 instructions # 1.51 insn per cycle + 2.259677657 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2039) (512y: 162) (512z: 3731) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.413122e+00 +Avg ME (F77/C++) = 1.4131213786174055 +Relative difference = 4.3972324717191576e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt index e3e0c6693f..93a6bfaa86 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_10:27:00 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:28:41 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.208150e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.259078e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.259226e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.624262 sec -INFO: No Floating Point Exceptions have been reported - 1,851,381,223 cycles:u # 2.964 GHz (74.24%) - 2,899,022 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.80%) - 28,953,261 stalled-cycles-backend:u # 1.56% backend cycles idle (74.16%) - 2,071,880,732 instructions:u # 1.12 insn per cycle - # 0.01 stalled cycles per insn (74.39%) - 0.676502748 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.315412e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.344135e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.346271e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.536787 sec +INFO: No Floating Point Exceptions have been reported + 2,272,867,740 cycles # 2.957 GHz + 3,556,184,244 instructions # 1.56 insn per cycle + 0.829093650 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.807571e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.813781e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.813898e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.039224 sec -INFO: No Floating Point Exceptions have been reported - 20,636,709,348 cycles:u # 3.405 GHz (75.16%) - 3,160,012 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.05%) - 8,104,094 stalled-cycles-backend:u # 0.04% backend cycles idle (74.82%) - 18,528,863,482 instructions:u # 0.90 insn per cycle - # 0.00 stalled cycles per insn (74.79%) - 6.105402981 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.139015e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.169154e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.170337e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.043985 sec +INFO: No Floating Point Exceptions have been reported + 9,922,374,295 cycles # 3.004 GHz + 22,624,836,598 instructions # 2.28 insn per cycle + 3.359970198 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.664747e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.665966e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.665966e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.162519 sec -INFO: No Floating Point Exceptions have been reported - 21,588,585,412 cycles:u # 3.501 GHz (74.97%) - 3,703,442 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.96%) - 3,063,072,888 stalled-cycles-backend:u # 14.19% backend cycles idle (74.96%) - 78,071,257,559 instructions:u # 3.62 insn per cycle - # 0.04 stalled cycles per insn (74.94%) - 6.169930605 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.936959e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.937903e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.937903e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.473447 sec +INFO: No Floating Point Exceptions have been reported + 25,631,294,284 cycles # 3.024 GHz + 78,955,065,792 instructions # 3.08 insn per cycle + 8.477634665 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.451001e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.456090e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.456090e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.016908 sec -INFO: No Floating Point Exceptions have been reported - 10,568,442,816 cycles:u # 3.500 GHz (74.85%) - 452,444 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.94%) - 1,438,678,493 stalled-cycles-backend:u # 13.61% backend cycles idle (75.06%) - 39,407,284,020 instructions:u # 3.73 insn per cycle - # 0.04 stalled cycles per insn (75.10%) - 3.024966897 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.626289e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.629595e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.629595e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.529195 sec +INFO: No Floating Point Exceptions have been reported + 13,151,239,745 cycles # 2.901 GHz + 39,558,608,970 instructions # 3.01 insn per cycle + 4.533411053 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.231004e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.233582e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.233582e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.340250 sec -INFO: No Floating Point Exceptions have been reported - 4,701,384,029 cycles:u # 3.500 GHz (74.92%) - 1,685,243 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.99%) - 414,413,134 stalled-cycles-backend:u # 8.81% backend cycles idle (74.99%) - 13,815,059,162 instructions:u # 2.94 insn per cycle - # 0.03 stalled cycles per insn (74.99%) - 1.348496912 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.338008e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.354821e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.354821e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.973498 sec +INFO: No Floating Point Exceptions have been reported + 5,607,402,462 cycles # 2.836 GHz + 13,823,390,464 instructions # 2.47 insn per cycle + 1.977813759 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.523267e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.545652e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.545652e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.728657 sec +INFO: No Floating Point Exceptions have been reported + 4,913,666,819 cycles # 2.837 GHz + 12,505,073,837 instructions # 2.54 insn per cycle + 1.733007927 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.360564e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.374844e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.374844e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.234846 sec +INFO: No Floating Point Exceptions have been reported + 4,137,413,855 cycles # 1.848 GHz + 6,391,961,816 instructions # 1.54 insn per cycle + 2.239204941 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt index 5cfdad968d..b5935c9801 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,97 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_11:15:38 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:02:58 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.222149e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.259480e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.259480e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.571352 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,639,028,607 cycles:u # 2.908 GHz (75.21%) - 3,355,204 stalled-cycles-frontend:u # 0.20% frontend cycles idle (74.89%) - 35,988,900 stalled-cycles-backend:u # 2.20% backend cycles idle (75.26%) - 1,957,806,439 instructions:u # 1.19 insn per cycle - # 0.02 stalled cycles per insn (75.89%) - 0.624643467 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.989124e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.283210e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.283210e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.521005 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,237,566,944 cycles # 2.967 GHz + 3,555,564,718 instructions # 1.59 insn per cycle + 0.813310962 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.737936e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.807078e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.807078e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 6.879795 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 23,414,630,300 cycles:u # 3.385 GHz (75.07%) - 39,088,022 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.10%) - 1,100,759,510 stalled-cycles-backend:u # 4.70% backend cycles idle (75.11%) - 20,802,131,066 instructions:u # 0.89 insn per cycle - # 0.05 stalled cycles per insn (75.02%) - 6.955289687 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.655915e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.126232e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.126232e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.296128 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 10,826,956,183 cycles # 3.021 GHz + 24,051,339,768 instructions # 2.22 insn per cycle + 3.639963445 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -79,36 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.667664e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.668909e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.668909e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.159060 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 21,563,379,147 cycles:u # 3.499 GHz (74.95%) - 855,839 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.95%) - 3,176,265,684 stalled-cycles-backend:u # 14.73% backend cycles idle (74.98%) - 78,148,060,715 instructions:u # 3.62 insn per cycle - # 0.04 stalled cycles per insn (75.04%) - 6.167497131 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.953031e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.954015e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.954015e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.407967 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 25,656,461,445 cycles # 3.050 GHz + 78,961,398,849 instructions # 3.08 insn per cycle + 8.412477675 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -116,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.470630e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.475832e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.475832e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.009493 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 10,535,457,620 cycles:u # 3.497 GHz (74.99%) - 475,430 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.04%) - 1,392,708,626 stalled-cycles-backend:u # 13.22% backend cycles idle (75.04%) - 39,356,377,208 instructions:u # 3.74 insn per cycle - # 0.04 stalled cycles per insn (75.04%) - 3.017223130 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.660154e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.664629e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.664629e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.493797 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 13,079,088,056 cycles # 2.909 GHz + 39,574,928,422 instructions # 3.03 insn per cycle + 4.498177013 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -153,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.243155e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.245914e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.245914e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.331271 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,656,550,883 cycles:u # 3.489 GHz (74.86%) - 672,946 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.83%) - 419,855,499 stalled-cycles-backend:u # 9.02% backend cycles idle (74.83%) - 13,812,073,425 instructions:u # 2.97 insn per cycle - # 0.03 stalled cycles per insn (74.83%) - 1.338874604 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.225316e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.242363e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.242363e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.004442 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,626,816,053 cycles # 2.802 GHz + 13,835,486,332 instructions # 2.46 insn per cycle + 2.009028620 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -190,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.559024e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.583873e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.583873e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.726859 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,933,984,591 cycles # 2.851 GHz + 12,515,815,938 instructions # 2.54 insn per cycle + 1.731571167 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.374751e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.389187e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.389187e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.234434 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,161,174,225 cycles # 1.859 GHz + 6,403,903,805 instructions # 1.54 insn per cycle + 2.238967112 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt index e0442f707e..8e9f4dbb7f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_11:20:21 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:13:39 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.192547e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.254131e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.254280e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.309339e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.337150e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.338770e+05 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.545325 sec -INFO: No Floating Point Exceptions have been reported - 1,666,928,797 cycles:u # 2.985 GHz (74.33%) - 3,209,148 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.59%) - 34,156,836 stalled-cycles-backend:u # 2.05% backend cycles idle (75.58%) - 1,984,746,124 instructions:u # 1.19 insn per cycle - # 0.02 stalled cycles per insn (74.33%) - 0.591706659 seconds time elapsed +TOTAL : 0.515639 sec +INFO: No Floating Point Exceptions have been reported + 2,211,990,760 cycles # 2.964 GHz + 3,494,673,373 instructions # 1.58 insn per cycle + 0.807662245 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.806735e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.814291e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.814407e+05 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 4.142294e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.173330e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.174533e+05 ) sec^-1 MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 6.713214 sec -INFO: No Floating Point Exceptions have been reported - 22,947,093,828 cycles:u # 3.405 GHz (75.02%) - 28,527,633 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.97%) - 1,109,103,473 stalled-cycles-backend:u # 4.83% backend cycles idle (74.95%) - 19,994,352,529 instructions:u # 0.87 insn per cycle - # 0.06 stalled cycles per insn (74.90%) - 6.777028620 seconds time elapsed +TOTAL : 3.131700 sec +INFO: No Floating Point Exceptions have been reported + 10,175,482,357 cycles # 3.002 GHz + 23,150,986,357 instructions # 2.28 insn per cycle + 3.445678001 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.660565e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.662263e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.662263e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 1.957758e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.958752e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.958752e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.176568 sec -INFO: No Floating Point Exceptions have been reported - 21,675,145,023 cycles:u # 3.508 GHz (74.96%) - 1,029,031 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.02%) - 3,346,561,647 stalled-cycles-backend:u # 15.44% backend cycles idle (75.01%) - 78,065,884,281 instructions:u # 3.60 insn per cycle - # 0.04 stalled cycles per insn (75.01%) - 6.181564383 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 8.386452 sec +INFO: No Floating Point Exceptions have been reported + 25,647,894,641 cycles # 3.057 GHz + 78,959,237,985 instructions # 3.08 insn per cycle + 8.390795470 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.471851e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.476956e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.476956e+03 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 3.631833e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.635219e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.635219e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.004842 sec -INFO: No Floating Point Exceptions have been reported - 10,532,277,206 cycles:u # 3.503 GHz (75.00%) - 522,789 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) - 1,378,827,302 stalled-cycles-backend:u # 13.09% backend cycles idle (74.99%) - 39,375,118,112 instructions:u # 3.74 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 3.008914073 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) +TOTAL : 4.523349 sec +INFO: No Floating Point Exceptions have been reported + 13,074,947,964 cycles # 2.889 GHz + 39,559,504,140 instructions # 3.03 insn per cycle + 4.527544607 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.238951e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.241547e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.241547e+04 ) sec^-1 +EvtsPerSec[Rmb+ME] (23) = ( 8.398181e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.415106e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.415106e+03 ) sec^-1 MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.331496 sec -INFO: No Floating Point Exceptions have been reported - 4,653,892,675 cycles:u # 3.491 GHz (74.81%) - 755,875 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.53%) - 416,266,786 stalled-cycles-backend:u # 8.94% backend cycles idle (74.55%) - 13,838,410,994 instructions:u # 2.97 insn per cycle - # 0.03 stalled cycles per insn (75.05%) - 1.335537948 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) +TOTAL : 1.960799 sec +INFO: No Floating Point Exceptions have been reported + 5,617,485,604 cycles # 2.860 GHz + 13,822,447,933 instructions # 2.46 insn per cycle + 1.965050700 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.596236e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.620000e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.620000e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 1.716966 sec +INFO: No Floating Point Exceptions have been reported + 4,918,671,268 cycles # 2.859 GHz + 12,502,910,272 instructions # 2.54 insn per cycle + 1.721169261 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.498633e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.512281e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.512281e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 +TOTAL : 2.195425 sec +INFO: No Floating Point Exceptions have been reported + 4,134,969,374 cycles # 1.881 GHz + 6,389,980,315 instructions # 1.55 insn per cycle + 2.199787012 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt index 73b422fb64..3af515fdce 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt @@ -1,69 +1,86 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_11:18:35 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:08:10 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.220956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.256303e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.256456e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 0.562105 sec -INFO: No Floating Point Exceptions have been reported - 1,615,560,035 cycles:u # 2.884 GHz (75.64%) - 3,279,106 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.73%) - 38,318,371 stalled-cycles-backend:u # 2.37% backend cycles idle (75.75%) - 2,007,901,935 instructions:u # 1.24 insn per cycle - # 0.02 stalled cycles per insn (75.48%) - 0.612103578 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.060906e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.341479e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.343286e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.519080 sec +INFO: No Floating Point Exceptions have been reported + 2,221,734,414 cycles # 2.960 GHz + 3,514,068,927 instructions # 1.58 insn per cycle + 0.810053031 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.749778e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.814387e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.814503e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.252232e+02 +- 1.234346e+02 ) GeV^-4 -TOTAL : 6.810345 sec -INFO: No Floating Point Exceptions have been reported - 23,259,531,034 cycles:u # 3.398 GHz (75.09%) - 38,887,134 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.99%) - 1,113,006,856 stalled-cycles-backend:u # 4.79% backend cycles idle (74.95%) - 20,783,979,610 instructions:u # 0.89 insn per cycle - # 0.05 stalled cycles per insn (74.86%) - 6.871836634 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.749279e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.174695e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.175895e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.201027 sec +INFO: No Floating Point Exceptions have been reported + 10,427,032,875 cycles # 3.015 GHz + 22,883,454,671 instructions # 2.19 insn per cycle + 3.514669910 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -71,34 +88,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.671337e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.672629e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.672629e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.148237 sec -INFO: No Floating Point Exceptions have been reported - 21,524,198,011 cycles:u # 3.500 GHz (75.03%) - 871,853 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.97%) - 3,159,080,202 stalled-cycles-backend:u # 14.68% backend cycles idle (74.97%) - 78,121,639,221 instructions:u # 3.63 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 6.152385882 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4744) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.951553e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.952512e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.952512e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.410578 sec +INFO: No Floating Point Exceptions have been reported + 25,641,456,753 cycles # 3.048 GHz + 78,954,490,540 instructions # 3.08 insn per cycle + 8.414704716 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -106,34 +122,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.477126e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.482243e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.482243e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.002162 sec -INFO: No Floating Point Exceptions have been reported - 10,518,742,993 cycles:u # 3.501 GHz (74.97%) - 436,480 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.97%) - 1,374,111,555 stalled-cycles-backend:u # 13.06% backend cycles idle (74.97%) - 39,405,294,105 instructions:u # 3.75 insn per cycle - # 0.03 stalled cycles per insn (74.97%) - 3.006362595 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11946) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.419759e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.422883e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.422883e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.801765 sec +INFO: No Floating Point Exceptions have been reported + 13,757,257,019 cycles # 2.863 GHz + 39,559,580,410 instructions # 2.88 insn per cycle + 4.806002877 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -141,34 +154,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.230963e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.233559e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.233559e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.340843 sec -INFO: No Floating Point Exceptions have been reported - 4,682,086,210 cycles:u # 3.486 GHz (74.99%) - 447,842 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 440,082,686 stalled-cycles-backend:u # 9.40% backend cycles idle (74.98%) - 13,804,782,265 instructions:u # 2.95 insn per cycle - # 0.03 stalled cycles per insn (74.98%) - 1.344891707 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10239) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.392232e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.409007e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.409007e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.960333 sec +INFO: No Floating Point Exceptions have been reported + 5,607,404,860 cycles # 2.855 GHz + 13,823,277,017 instructions # 2.47 insn per cycle + 1.964520797 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11520) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,16 +186,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.473692e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.495146e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.495146e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.737232 sec +INFO: No Floating Point Exceptions have been reported + 4,913,030,620 cycles # 2.823 GHz + 12,505,111,466 instructions # 2.55 insn per cycle + 1.741396842 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10439) (512y: 89) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.352701e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.365792e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.365792e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.237312 sec +INFO: No Floating Point Exceptions have been reported + 4,145,251,099 cycles # 1.850 GHz + 6,392,502,399 instructions # 1.54 insn per cycle + 2.241587160 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1978) (512y: 101) (512z: 9386) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt index 7faa487866..296b845e54 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_10:27:23 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:29:15 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.215784e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.273497e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273655e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.541809 sec -INFO: No Floating Point Exceptions have been reported - 1,606,817,035 cycles:u # 2.918 GHz (74.91%) - 2,376,384 stalled-cycles-frontend:u # 0.15% frontend cycles idle (76.08%) - 5,463,411 stalled-cycles-backend:u # 0.34% backend cycles idle (76.60%) - 1,982,702,097 instructions:u # 1.23 insn per cycle - # 0.00 stalled cycles per insn (74.44%) - 0.596494995 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.311659e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.341543e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.343557e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.530710 sec +INFO: No Floating Point Exceptions have been reported + 2,270,985,914 cycles # 2.965 GHz + 3,517,062,690 instructions # 1.55 insn per cycle + 0.822991293 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.815220e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.821763e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.821880e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.026904 sec -INFO: No Floating Point Exceptions have been reported - 20,631,233,989 cycles:u # 3.411 GHz (75.10%) - 3,303,034 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.06%) - 5,924,934 stalled-cycles-backend:u # 0.03% backend cycles idle (74.96%) - 18,496,865,295 instructions:u # 0.90 insn per cycle - # 0.00 stalled cycles per insn (74.89%) - 6.091229642 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.147376e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.178022e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.179287e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.036375 sec +INFO: No Floating Point Exceptions have been reported + 9,886,012,446 cycles # 2.996 GHz + 20,958,419,825 instructions # 2.12 insn per cycle + 3.356479014 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158133E-004 +Relative difference = 2.837296512218831e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.675758e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.677001e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.677001e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.137224 sec -INFO: No Floating Point Exceptions have been reported - 21,485,025,252 cycles:u # 3.499 GHz (74.99%) - 855,560 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) - 2,803,835,780 stalled-cycles-backend:u # 13.05% backend cycles idle (74.99%) - 78,078,147,682 instructions:u # 3.63 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 6.144495815 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4695) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.941477e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.942438e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.942438e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.454110 sec +INFO: No Floating Point Exceptions have been reported + 25,600,898,635 cycles # 3.027 GHz + 78,700,147,482 instructions # 3.07 insn per cycle + 8.458308380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4191) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141133E-004 Relative difference = 2.8372990776517314e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.474541e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.479633e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.479633e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.003606 sec -INFO: No Floating Point Exceptions have been reported - 10,493,041,055 cycles:u # 3.490 GHz (74.99%) - 458,788 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.99%) - 1,391,022,399 stalled-cycles-backend:u # 13.26% backend cycles idle (74.99%) - 39,388,790,006 instructions:u # 3.75 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 3.010970463 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11940) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.685244e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.688800e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.688800e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.456270 sec +INFO: No Floating Point Exceptions have been reported + 13,027,228,689 cycles # 2.921 GHz + 39,448,830,373 instructions # 3.03 insn per cycle + 4.460509331 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12966) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198141122E-004 Relative difference = 2.837299079287849e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.233223e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.235836e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.235836e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.337966 sec -INFO: No Floating Point Exceptions have been reported - 4,669,113,757 cycles:u # 3.482 GHz (74.96%) - 302,720 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.95%) - 559,911,149 stalled-cycles-backend:u # 11.99% backend cycles idle (74.95%) - 13,826,818,213 instructions:u # 2.96 insn per cycle - # 0.04 stalled cycles per insn (74.95%) - 1.345263511 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10220) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.659238e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.673263e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.673263e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.147446 sec +INFO: No Floating Point Exceptions have been reported + 6,105,169,365 cycles # 2.838 GHz + 13,911,506,311 instructions # 2.28 insn per cycle + 2.151814673 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11582) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198157309E-004 -Relative difference = 2.837296636563793e-07 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.414304e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.436030e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.436030e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.748441 sec +INFO: No Floating Point Exceptions have been reported + 4,989,990,459 cycles # 2.848 GHz + 12,602,385,911 instructions # 2.53 insn per cycle + 1.752785329 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10423) (512y: 241) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.286007e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.299200e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.299200e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.257195 sec +INFO: No Floating Point Exceptions have been reported + 4,157,035,910 cycles # 1.839 GHz + 6,500,123,841 instructions # 1.56 insn per cycle + 2.261537219 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1754) (512y: 193) (512z: 9382) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198157320E-004 +Relative difference = 2.837296634927675e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt index bead9bc4fd..b2e3af3136 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_11:05:55 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:53:31 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.204581e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.259080e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.259232e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.544267 sec -INFO: No Floating Point Exceptions have been reported - 1,594,544,095 cycles:u # 2.890 GHz (75.38%) - 2,297,817 stalled-cycles-frontend:u # 0.14% frontend cycles idle (75.34%) - 6,864,918 stalled-cycles-backend:u # 0.43% backend cycles idle (75.46%) - 2,035,542,593 instructions:u # 1.28 insn per cycle - # 0.00 stalled cycles per insn (75.47%) - 0.596819706 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.100239e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.122259e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.123671e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.538955 sec +INFO: No Floating Point Exceptions have been reported + 2,284,263,136 cycles # 2.966 GHz + 3,551,683,146 instructions # 1.55 insn per cycle + 0.827784044 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.807801e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.813996e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.814112e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.039166 sec -INFO: No Floating Point Exceptions have been reported - 20,672,886,525 cycles:u # 3.409 GHz (74.94%) - 3,556,848 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.08%) - 6,584,175 stalled-cycles-backend:u # 0.03% backend cycles idle (75.14%) - 18,393,856,750 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (75.12%) - 6.108540721 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.754763e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.780247e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.781287e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.308444 sec +INFO: No Floating Point Exceptions have been reported + 10,753,673,387 cycles # 3.016 GHz + 22,598,773,039 instructions # 2.10 insn per cycle + 3.621798315 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.642056e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.642427e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.642427e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 35.337688 sec -INFO: No Floating Point Exceptions have been reported - 123,786,426,168 cycles:u # 3.503 GHz (74.99%) - 32,182,693 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.00%) - 11,797,922,825 stalled-cycles-backend:u # 9.53% backend cycles idle (75.01%) - 141,197,682,575 instructions:u # 1.14 insn per cycle - # 0.08 stalled cycles per insn (75.00%) - 35.345151666 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21379) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.447762e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.448268e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.448268e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 36.881084 sec +INFO: No Floating Point Exceptions have been reported + 112,229,307,455 cycles # 3.043 GHz + 144,790,435,802 instructions # 1.29 insn per cycle + 36.885388068 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21273) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731198140461E-004 Relative difference = 2.8372991790910424e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.625433e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.627671e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.627671e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.532024 sec -INFO: No Floating Point Exceptions have been reported - 15,876,135,975 cycles:u # 3.501 GHz (74.95%) - 4,561,261 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.95%) - 6,682,795,030 stalled-cycles-backend:u # 42.09% backend cycles idle (74.95%) - 37,517,219,456 instructions:u # 2.36 insn per cycle - # 0.18 stalled cycles per insn (74.96%) - 4.539242499 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68150) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.213545e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.216099e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.216099e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.109796 sec +INFO: No Floating Point Exceptions have been reported + 14,729,625,754 cycles # 2.881 GHz + 37,604,791,196 instructions # 2.55 insn per cycle + 5.114120613 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68172) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141220E-004 -Relative difference = 2.837299064562788e-07 +Avg ME (F77/C++) = 6.6266731198141209E-004 +Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.516587e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.526357e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.526357e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.189932 sec -INFO: No Floating Point Exceptions have been reported - 7,653,291,510 cycles:u # 3.490 GHz (74.83%) - 433,752 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.83%) - 4,407,512,563 stalled-cycles-backend:u # 57.59% backend cycles idle (74.91%) - 12,913,139,300 instructions:u # 1.69 insn per cycle - # 0.34 stalled cycles per insn (75.09%) - 2.197244436 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46482) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.692100e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.706833e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.706833e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.138452 sec +INFO: No Floating Point Exceptions have been reported + 6,118,049,713 cycles # 2.856 GHz + 13,052,938,667 instructions # 2.13 insn per cycle + 2.142728323 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46946) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156778E-004 -Relative difference = 2.837296716733571e-07 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.248664e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.270457e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.270457e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.779918 sec +INFO: No Floating Point Exceptions have been reported + 5,070,510,804 cycles # 2.845 GHz + 11,451,450,406 instructions # 2.26 insn per cycle + 1.784180525 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40486) (512y: 285) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.770608e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.785711e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.785711e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.116802 sec +INFO: No Floating Point Exceptions have been reported + 3,955,046,373 cycles # 1.865 GHz + 5,927,215,305 instructions # 1.50 insn per cycle + 2.121083388 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2444) (512y: 337) (512z:39338) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt index 6d4b979ef0..567d9226df 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_11:06:53 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:54:38 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.217021e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.273344e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.273498e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.543903 sec -INFO: No Floating Point Exceptions have been reported - 1,612,633,435 cycles:u # 2.918 GHz (75.12%) - 2,569,790 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.51%) - 8,142,655 stalled-cycles-backend:u # 0.50% backend cycles idle (75.60%) - 2,080,630,465 instructions:u # 1.29 insn per cycle - # 0.00 stalled cycles per insn (75.23%) - 0.592397245 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.114232e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.137301e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.138948e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.536968 sec +INFO: No Floating Point Exceptions have been reported + 2,275,180,937 cycles # 2.958 GHz + 3,539,221,489 instructions # 1.56 insn per cycle + 0.826289591 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.815395e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.821733e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.821850e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.032526 sec -INFO: No Floating Point Exceptions have been reported - 20,680,767,800 cycles:u # 3.411 GHz (74.98%) - 3,381,725 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.06%) - 7,930,374 stalled-cycles-backend:u # 0.04% backend cycles idle (75.08%) - 18,435,488,785 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (75.04%) - 6.145204538 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.750926e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.776588e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.777633e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.299647 sec +INFO: No Floating Point Exceptions have been reported + 10,717,601,484 cycles # 3.014 GHz + 24,394,837,994 instructions # 2.28 insn per cycle + 3.614900556 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 -Avg ME (F77/GPU) = 6.6266731198158101E-004 -Relative difference = 2.837296517127185e-07 +Avg ME (F77/GPU) = 6.6266731198158122E-004 +Relative difference = 2.837296513854949e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 4.607078e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.607445e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.607445e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 35.605652 sec -INFO: No Floating Point Exceptions have been reported - 124,829,217,868 cycles:u # 3.506 GHz (75.00%) - 79,483,257 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) - 10,477,235,146 stalled-cycles-backend:u # 8.39% backend cycles idle (75.00%) - 140,886,082,991 instructions:u # 1.13 insn per cycle - # 0.07 stalled cycles per insn (75.00%) - 35.616217715 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:21174) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.368481e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.368956e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.368956e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 37.549568 sec +INFO: No Floating Point Exceptions have been reported + 113,756,177,543 cycles # 3.029 GHz + 144,279,233,748 instructions # 1.27 insn per cycle + 37.553893626 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:21024) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198140482E-004 -Relative difference = 2.8372991758188064e-07 +Avg ME (F77/C++) = 6.6266731198140450E-004 +Relative difference = 2.83729918072716e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.559493e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.561660e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.561660e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 4.615733 sec -INFO: No Floating Point Exceptions have been reported - 16,129,890,206 cycles:u # 3.493 GHz (74.91%) - 3,026,043 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.96%) - 6,217,990,465 stalled-cycles-backend:u # 38.55% backend cycles idle (75.06%) - 37,497,496,126 instructions:u # 2.32 insn per cycle - # 0.17 stalled cycles per insn (75.06%) - 4.622951464 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68049) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.101360e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.103709e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.103709e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 5.293950 sec +INFO: No Floating Point Exceptions have been reported + 15,276,793,173 cycles # 2.885 GHz + 37,839,533,934 instructions # 2.48 insn per cycle + 5.298219477 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:68594) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198141220E-004 -Relative difference = 2.837299064562788e-07 +Avg ME (F77/C++) = 6.6266731198141209E-004 +Relative difference = 2.8372990661989057e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.688066e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.698173e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.698173e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.141376 sec -INFO: No Floating Point Exceptions have been reported - 7,490,478,364 cycles:u # 3.493 GHz (75.01%) - 398,988 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 4,139,339,201 stalled-cycles-backend:u # 55.26% backend cycles idle (75.01%) - 12,775,314,953 instructions:u # 1.71 insn per cycle - # 0.32 stalled cycles per insn (75.01%) - 2.148542351 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:45597) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.769981e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.784911e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.784911e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.116737 sec +INFO: No Floating Point Exceptions have been reported + 5,996,887,243 cycles # 2.829 GHz + 12,920,986,626 instructions # 2.15 insn per cycle + 2.120808857 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:46048) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266731198156778E-004 -Relative difference = 2.837296716733571e-07 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.205151e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.226957e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.226957e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.787893 sec +INFO: No Floating Point Exceptions have been reported + 5,091,257,021 cycles # 2.842 GHz + 11,450,857,319 instructions # 2.25 insn per cycle + 1.792163037 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:40151) (512y: 219) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.725567e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.740384e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.740384e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.129337 sec +INFO: No Floating Point Exceptions have been reported + 3,958,012,203 cycles # 1.856 GHz + 5,893,673,725 instructions # 1.49 insn per cycle + 2.133623159 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1959) (512y: 259) (512z:38977) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266731198156789E-004 +Relative difference = 2.837296715097453e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt index 5808decd6f..5d514798b3 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_10:27:45 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:29:49 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.013165e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.166222e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.166578e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 -TOTAL : 0.488146 sec -INFO: No Floating Point Exceptions have been reported - 1,415,555,543 cycles:u # 2.855 GHz (75.38%) - 2,516,691 stalled-cycles-frontend:u # 0.18% frontend cycles idle (75.39%) - 5,607,343 stalled-cycles-backend:u # 0.40% backend cycles idle (76.18%) - 1,846,857,501 instructions:u # 1.30 insn per cycle - # 0.00 stalled cycles per insn (75.91%) - 0.543476502 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.483751e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.526267e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.530499e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.494153 sec +INFO: No Floating Point Exceptions have been reported + 2,103,124,807 cycles # 2.954 GHz + 3,121,712,472 instructions # 1.48 insn per cycle + 0.773554314 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.941879e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.965090e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.965379e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 -TOTAL : 3.641309 sec -INFO: No Floating Point Exceptions have been reported - 12,398,470,985 cycles:u # 3.390 GHz (74.72%) - 2,675,203 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.88%) - 12,468,924 stalled-cycles-backend:u # 0.10% backend cycles idle (74.85%) - 11,363,812,580 instructions:u # 0.92 insn per cycle - # 0.00 stalled cycles per insn (75.09%) - 3.700528617 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.160066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.222867e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.225655e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.790893 sec +INFO: No Floating Point Exceptions have been reported + 6,074,189,476 cycles # 2.980 GHz + 12,927,595,973 instructions # 2.13 insn per cycle + 2.094579269 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025603446138E-004 -Relative difference = 4.022437625032909e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.747827e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.749091e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.749091e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 5.974614 sec -INFO: No Floating Point Exceptions have been reported - 20,938,735,525 cycles:u # 3.503 GHz (74.99%) - 1,421,346 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) - 2,789,809,039 stalled-cycles-backend:u # 13.32% backend cycles idle (74.97%) - 78,052,866,435 instructions:u # 3.73 insn per cycle - # 0.04 stalled cycles per insn (74.97%) - 5.982004691 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.991600e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.992621e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.992621e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.239956 sec +INFO: No Floating Point Exceptions have been reported + 24,920,798,039 cycles # 3.024 GHz + 79,109,177,964 instructions # 3.17 insn per cycle + 8.244226962 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274868816393329E-004 -Relative difference = 1.7859056895059718e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.090926e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.092983e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.092983e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.509761 sec -INFO: No Floating Point Exceptions have been reported - 5,288,936,507 cycles:u # 3.497 GHz (74.69%) - 217,362 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.83%) - 697,270,102 stalled-cycles-backend:u # 13.18% backend cycles idle (75.08%) - 20,304,183,045 instructions:u # 3.84 insn per cycle - # 0.03 stalled cycles per insn (75.14%) - 1.516786989 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.256911e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.270142e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.270142e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.264792 sec +INFO: No Floating Point Exceptions have been reported + 6,533,363,065 cycles # 2.880 GHz + 20,270,541,393 instructions # 3.10 insn per cycle + 2.268973901 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274847398845038E-004 -Relative difference = 3.924799464139408e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.410807e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.421078e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.421078e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.686476 sec -INFO: No Floating Point Exceptions have been reported - 2,407,278,996 cycles:u # 3.493 GHz (74.57%) - 915,950 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.47%) - 263,753,591 stalled-cycles-backend:u # 10.96% backend cycles idle (74.37%) - 7,042,386,809 instructions:u # 2.93 insn per cycle - # 0.04 stalled cycles per insn (74.95%) - 0.693448187 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.646998e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.654072e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.654072e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.000797 sec +INFO: No Floating Point Exceptions have been reported + 2,839,215,106 cycles # 2.827 GHz + 7,065,941,238 instructions # 2.49 insn per cycle + 1.004916383 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271946993158581E-004 -Relative difference = 4.537125319208525e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.869083e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.877796e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.877796e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.882438 sec +INFO: No Floating Point Exceptions have been reported + 2,527,237,536 cycles # 2.853 GHz + 6,403,613,133 instructions # 2.53 insn per cycle + 0.886591858 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.495984e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.501538e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.501538e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.101478 sec +INFO: No Floating Point Exceptions have been reported + 2,074,107,629 cycles # 1.877 GHz + 3,304,393,311 instructions # 1.59 insn per cycle + 1.105808487 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt index 210503fe64..2dfc41840b 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,97 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_11:16:01 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:03:32 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.048713e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.154939e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.154939e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.206052e-01 +- 3.252639e-01 ) GeV^-4 -TOTAL : 0.494255 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,484,258,659 cycles:u # 2.920 GHz (74.03%) - 3,740,150 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.79%) - 37,553,445 stalled-cycles-backend:u # 2.53% backend cycles idle (76.42%) - 1,853,356,007 instructions:u # 1.25 insn per cycle - # 0.02 stalled cycles per insn (75.38%) - 0.544261251 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.941350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.461692e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.461692e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.477533 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,051,019,219 cycles # 2.960 GHz + 3,077,913,039 instructions # 1.50 insn per cycle + 0.750579271 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.649670e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.949091e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.949091e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.183967e+02 +- 1.165669e+02 ) GeV^-4 -TOTAL : 4.451863 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 15,013,669,983 cycles:u # 3.353 GHz (75.18%) - 39,259,352 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.10%) - 1,108,164,012 stalled-cycles-backend:u # 7.38% backend cycles idle (74.99%) - 13,601,634,469 instructions:u # 0.91 insn per cycle - # 0.08 stalled cycles per insn (74.86%) - 4.516134199 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.966568e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.089944e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.089944e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 +TOTAL : 1.964323 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,640,871,467 cycles # 3.008 GHz + 14,013,929,876 instructions # 2.11 insn per cycle + 2.263846286 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -79,36 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025603446138E-004 -Relative difference = 4.022437625032909e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.739855e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.741118e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.741118e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 5.994162 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 21,020,909,991 cycles:u # 3.505 GHz (74.84%) - 7,289,869 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.01%) - 2,786,603,340 stalled-cycles-backend:u # 13.26% backend cycles idle (75.05%) - 78,042,375,969 instructions:u # 3.71 insn per cycle - # 0.04 stalled cycles per insn (75.05%) - 6.001397483 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.003416e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.004461e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.004461e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.193798 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 24,914,156,131 cycles # 3.040 GHz + 79,113,283,238 instructions # 3.18 insn per cycle + 8.198127255 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -116,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274868816393329E-004 -Relative difference = 1.7859056895059718e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.089785e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.091828e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.091828e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.513689 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 5,299,089,664 cycles:u # 3.494 GHz (74.69%) - 410,422 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.84%) - 720,273,200 stalled-cycles-backend:u # 13.59% backend cycles idle (75.11%) - 20,303,253,905 instructions:u # 3.83 insn per cycle - # 0.04 stalled cycles per insn (75.21%) - 1.521153254 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.268604e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.282277e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.282277e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.263945 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,560,498,165 cycles # 2.893 GHz + 20,280,423,064 instructions # 3.09 insn per cycle + 2.268263136 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -153,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274847398845038E-004 -Relative difference = 3.924799464139408e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.422716e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.433093e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.433093e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.685519 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,400,647,554 cycles:u # 3.487 GHz (74.51%) - 764,490 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.45%) - 226,385,909 stalled-cycles-backend:u # 9.43% backend cycles idle (74.53%) - 7,042,751,685 instructions:u # 2.93 insn per cycle - # 0.03 stalled cycles per insn (75.09%) - 0.692625436 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.650562e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.657776e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.657776e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.001327 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,847,194,781 cycles # 2.833 GHz + 7,076,285,592 instructions # 2.49 insn per cycle + 1.005550089 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -190,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271946993158581E-004 -Relative difference = 4.537125319208525e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.886394e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.895503e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.895503e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.876996 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,532,131,583 cycles # 2.875 GHz + 6,413,285,430 instructions # 2.53 insn per cycle + 0.881306742 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.496106e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.501711e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.501711e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.104249 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,079,471,281 cycles # 1.877 GHz + 3,314,022,575 instructions # 1.59 insn per cycle + 1.108641897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt index 30c3c51f0d..f59a43ef84 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_11:20:44 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:14:13 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.977433e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.163711e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.164069e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.205840e-01 +- 3.252482e-01 ) GeV^-4 -TOTAL : 0.486131 sec -INFO: No Floating Point Exceptions have been reported - 1,432,838,430 cycles:u # 2.878 GHz (75.92%) - 2,865,850 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.92%) - 33,010,647 stalled-cycles-backend:u # 2.30% backend cycles idle (75.92%) - 1,834,730,819 instructions:u # 1.28 insn per cycle - # 0.02 stalled cycles per insn (74.42%) - 0.532007069 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.506269e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.548412e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.552269e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.159396e-01 +- 3.238803e-01 ) GeV^-4 +TOTAL : 0.473972 sec +INFO: No Floating Point Exceptions have been reported + 2,046,977,318 cycles # 2.972 GHz + 3,047,751,198 instructions # 1.49 insn per cycle + 0.746093011 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.941273e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.966657e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.966945e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.183835e+02 +- 1.165669e+02 ) GeV^-4 -TOTAL : 4.316072 sec -INFO: No Floating Point Exceptions have been reported - 14,634,879,974 cycles:u # 3.375 GHz (75.17%) - 28,023,828 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.06%) - 1,080,177,933 stalled-cycles-backend:u # 7.38% backend cycles idle (74.92%) - 12,802,878,043 instructions:u # 0.87 insn per cycle - # 0.08 stalled cycles per insn (74.86%) - 4.373208437 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.132349e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.194879e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.197694e+05 ) sec^-1 +MeanMatrixElemValue = ( 1.094367e+02 +- 1.071509e+02 ) GeV^-4 +TOTAL : 1.875001 sec +INFO: No Floating Point Exceptions have been reported + 6,377,015,026 cycles # 3.014 GHz + 13,456,664,964 instructions # 2.11 insn per cycle + 2.175037071 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025603446138E-004 -Relative difference = 4.022437625032909e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.743804e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.745174e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.745174e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 5.984050 sec -INFO: No Floating Point Exceptions have been reported - 20,959,056,605 cycles:u # 3.501 GHz (75.01%) - 7,048,916 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.01%) - 2,759,736,387 stalled-cycles-backend:u # 13.17% backend cycles idle (75.01%) - 78,050,009,240 instructions:u # 3.72 insn per cycle - # 0.04 stalled cycles per insn (75.01%) - 5.987947953 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.008641e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.009653e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.009653e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 +TOTAL : 8.170736 sec +INFO: No Floating Point Exceptions have been reported + 24,919,535,959 cycles # 3.049 GHz + 79,107,568,196 instructions # 3.17 insn per cycle + 8.174687518 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274868816393329E-004 -Relative difference = 1.7859056895059718e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.091176e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.093233e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.093233e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.509177 sec -INFO: No Floating Point Exceptions have been reported - 5,293,082,594 cycles:u # 3.503 GHz (74.76%) - 231,968 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.02%) - 702,913,833 stalled-cycles-backend:u # 13.28% backend cycles idle (75.12%) - 20,309,990,457 instructions:u # 3.84 insn per cycle - # 0.03 stalled cycles per insn (75.12%) - 1.512971887 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.228176e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.241678e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.241678e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.208457e-01 +- 3.253445e-01 ) GeV^-4 +TOTAL : 2.274725 sec +INFO: No Floating Point Exceptions have been reported + 6,529,719,760 cycles # 2.866 GHz + 20,269,126,653 instructions # 3.10 insn per cycle + 2.278762144 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274847398845038E-004 -Relative difference = 3.924799464139408e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.212276e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.223478e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.223478e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.749293 sec -INFO: No Floating Point Exceptions have been reported - 2,618,312,640 cycles:u # 3.483 GHz (74.47%) - 481,078 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.49%) - 274,174,540 stalled-cycles-backend:u # 10.47% backend cycles idle (74.89%) - 7,033,766,117 instructions:u # 2.69 insn per cycle - # 0.04 stalled cycles per insn (75.39%) - 0.753840551 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.543967e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.550020e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.550020e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 1.068855 sec +INFO: No Floating Point Exceptions have been reported + 2,839,565,669 cycles # 2.648 GHz + 7,065,359,777 instructions # 2.49 insn per cycle + 1.073003064 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271946993158581E-004 -Relative difference = 4.537125319208525e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.860425e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.869459e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.869459e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214978e-01 +- 3.255521e-01 ) GeV^-4 +TOTAL : 0.887472 sec +INFO: No Floating Point Exceptions have been reported + 2,533,693,672 cycles # 2.846 GHz + 6,400,193,071 instructions # 2.53 insn per cycle + 0.891520698 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.480335e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.485766e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.485766e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.214981e-01 +- 3.255523e-01 ) GeV^-4 +TOTAL : 1.114517 sec +INFO: No Floating Point Exceptions have been reported + 2,073,817,797 cycles # 1.855 GHz + 3,302,576,002 instructions # 1.59 insn per cycle + 1.118521025 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt index 3f21b859d4..d51b50aa19 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt @@ -1,69 +1,86 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_11:18:58 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:08:43 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.064587e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.167586e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.167925e+05 ) sec^-1 -MeanMatrixElemValue = ( 4.206052e-01 +- 3.252639e-01 ) GeV^-4 -TOTAL : 0.485000 sec -INFO: No Floating Point Exceptions have been reported - 1,450,564,825 cycles:u # 2.892 GHz (74.89%) - 3,384,624 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.13%) - 32,936,362 stalled-cycles-backend:u # 2.27% backend cycles idle (74.69%) - 1,893,341,305 instructions:u # 1.31 insn per cycle - # 0.02 stalled cycles per insn (75.29%) - 0.531131671 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.026858e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.479959e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.483629e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.048178e+00 +- 2.364571e+00 ) GeV^-4 +TOTAL : 0.478080 sec +INFO: No Floating Point Exceptions have been reported + 2,041,849,266 cycles # 2.949 GHz + 3,029,425,267 instructions # 1.48 insn per cycle + 0.750979183 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --rmbhst OMP= -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP= +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.674766e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.963630e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.963917e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.183967e+02 +- 1.165669e+02 ) GeV^-4 -TOTAL : 4.410855 sec -INFO: No Floating Point Exceptions have been reported - 14,968,323,331 cycles:u # 3.374 GHz (75.17%) - 39,156,540 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.07%) - 1,092,203,690 stalled-cycles-backend:u # 7.30% backend cycles idle (74.77%) - 13,544,332,277 instructions:u # 0.90 insn per cycle - # 0.08 stalled cycles per insn (74.77%) - 4.467521167 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.176974e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.225245e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.228004e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.641709e+00 +- 4.994248e+00 ) GeV^-4 +TOTAL : 1.893219 sec +INFO: No Floating Point Exceptions have been reported + 6,369,671,972 cycles # 2.999 GHz + 13,805,433,323 instructions # 2.17 insn per cycle + 2.180376348 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -71,34 +88,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025603446138E-004 -Relative difference = 4.022437625032909e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.737327e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.738674e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.738674e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 5.998189 sec -INFO: No Floating Point Exceptions have been reported - 21,034,970,742 cycles:u # 3.506 GHz (74.94%) - 7,216,384 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.94%) - 2,747,143,202 stalled-cycles-backend:u # 13.06% backend cycles idle (74.99%) - 78,077,897,154 instructions:u # 3.71 insn per cycle - # 0.04 stalled cycles per insn (75.05%) - 6.002159436 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2043) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.002985e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.003965e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.003965e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.192685 sec +INFO: No Floating Point Exceptions have been reported + 24,899,500,908 cycles # 3.038 GHz + 79,109,193,695 instructions # 3.18 insn per cycle + 8.196731570 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -106,34 +122,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274868816393329E-004 -Relative difference = 1.7859056895059718e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274863312764526E-004 +Relative difference = 4.998523613136231e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.082396e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.084464e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.084464e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.521644 sec -INFO: No Floating Point Exceptions have been reported - 5,326,757,820 cycles:u # 3.497 GHz (74.82%) - 252,670 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.81%) - 703,654,556 stalled-cycles-backend:u # 13.21% backend cycles idle (74.79%) - 20,327,779,437 instructions:u # 3.82 insn per cycle - # 0.03 stalled cycles per insn (74.96%) - 1.525462915 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.200812e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.214231e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.214231e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.282348 sec +INFO: No Floating Point Exceptions have been reported + 6,530,583,474 cycles # 2.857 GHz + 20,270,600,320 instructions # 3.10 insn per cycle + 2.286554025 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -141,34 +154,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274847398845038E-004 -Relative difference = 3.924799464139408e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861442972011E-004 +Relative difference = 2.1772539563413118e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.425817e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.437263e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.437263e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.682637 sec -INFO: No Floating Point Exceptions have been reported - 2,386,005,165 cycles:u # 3.485 GHz (74.16%) - 292,181 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.74%) - 214,651,421 stalled-cycles-backend:u # 9.00% backend cycles idle (75.46%) - 7,027,158,575 instructions:u # 2.95 insn per cycle - # 0.03 stalled cycles per insn (75.46%) - 0.686409709 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10799) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.663107e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.670148e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.670148e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.991015 sec +INFO: No Floating Point Exceptions have been reported + 2,834,464,958 cycles # 2.850 GHz + 7,065,761,630 instructions # 2.49 insn per cycle + 0.995105206 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12055) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -176,16 +186,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271946993158581E-004 -Relative difference = 4.537125319208525e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.873004e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.881673e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.881673e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.880479 sec +INFO: No Floating Point Exceptions have been reported + 2,525,421,644 cycles # 2.857 GHz + 6,403,279,155 instructions # 2.54 insn per cycle + 0.884506369 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11019) (512y: 44) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271938174396888E-004 +Relative difference = 2.7547150614455683e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.474559e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.479875e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.479875e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.117237 sec +INFO: No Floating Point Exceptions have been reported + 2,067,196,285 cycles # 1.845 GHz + 3,303,704,117 instructions # 1.60 insn per cycle + 1.121426905 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2603) (512y: 44) (512z: 9605) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952779718007E-004 +Relative difference = 4.194411063934945e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt index e26dda0aaa..e59a4c7649 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_10:28:03 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:30:15 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.007232e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.155231e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.155588e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 -TOTAL : 0.478018 sec -INFO: No Floating Point Exceptions have been reported - 1,407,702,810 cycles:u # 2.886 GHz (75.49%) - 2,592,505 stalled-cycles-frontend:u # 0.18% frontend cycles idle (73.44%) - 10,637,742 stalled-cycles-backend:u # 0.76% backend cycles idle (73.52%) - 1,813,846,392 instructions:u # 1.29 insn per cycle - # 0.01 stalled cycles per insn (74.27%) - 0.532596162 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.512381e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.556061e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.560063e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059596e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.492452 sec +INFO: No Floating Point Exceptions have been reported + 2,099,626,604 cycles # 2.948 GHz + 3,069,125,723 instructions # 1.46 insn per cycle + 0.769337960 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.016489e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.041051e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.041354e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 -TOTAL : 3.547661 sec -INFO: No Floating Point Exceptions have been reported - 12,037,965,362 cycles:u # 3.379 GHz (75.08%) - 2,929,122 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.04%) - 6,308,121 stalled-cycles-backend:u # 0.05% backend cycles idle (74.92%) - 11,034,214,209 instructions:u # 0.92 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 3.607477945 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.132307e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.195668e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.198555e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.801389 sec +INFO: No Floating Point Exceptions have been reported + 6,087,353,843 cycles # 2.992 GHz + 12,902,099,211 instructions # 2.12 insn per cycle + 2.093261081 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025603446138E-004 -Relative difference = 4.022437625032909e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262667672387088E-004 +Relative difference = 2.825534762507892e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.734133e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.735456e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.735456e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.208458e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 6.004874 sec -INFO: No Floating Point Exceptions have been reported - 21,034,046,883 cycles:u # 3.501 GHz (74.99%) - 1,246,412 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.97%) - 2,757,000,098 stalled-cycles-backend:u # 13.11% backend cycles idle (74.97%) - 78,049,772,360 instructions:u # 3.71 insn per cycle - # 0.04 stalled cycles per insn (74.97%) - 6.012706081 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1959) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.002964e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.003993e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.003993e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060121e+00 +- 2.367902e+00 ) GeV^-4 +TOTAL : 8.192835 sec +INFO: No Floating Point Exceptions have been reported + 24,924,243,070 cycles # 3.041 GHz + 78,847,605,592 instructions # 3.16 insn per cycle + 8.196950693 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627487e-04 -Avg ME (F77/C++) = 6.6274868874222764E-004 -Relative difference = 1.698648731198014e-08 +Avg ME (F77/C++) = 6.6274866250177339E-004 +Relative difference = 5.65798569465384e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.086370e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.088395e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.088395e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.208459e-01 +- 3.253446e-01 ) GeV^-4 -TOTAL : 1.515750 sec -INFO: No Floating Point Exceptions have been reported - 5,306,409,578 cycles:u # 3.494 GHz (74.72%) - 222,743 stalled-cycles-frontend:u # 0.00% frontend cycles idle (74.76%) - 816,077,120 stalled-cycles-backend:u # 15.38% backend cycles idle (75.00%) - 20,308,592,291 instructions:u # 3.83 insn per cycle - # 0.04 stalled cycles per insn (75.23%) - 1.522939793 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12412) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.423205e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.437587e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.437587e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.060119e+00 +- 2.367901e+00 ) GeV^-4 +TOTAL : 2.213992 sec +INFO: No Floating Point Exceptions have been reported + 6,479,488,334 cycles # 2.922 GHz + 20,229,540,572 instructions # 3.12 insn per cycle + 2.218146120 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13491) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627485e-04 -Avg ME (F77/C++) = 6.6274847398845038E-004 -Relative difference = 3.924799464139408e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627486e-04 +Avg ME (F77/C++) = 6.6274861448331612E-004 +Relative difference = 2.1853408865157068e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.426307e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.436673e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.436673e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.214980e-01 +- 3.255523e-01 ) GeV^-4 -TOTAL : 0.682233 sec -INFO: No Floating Point Exceptions have been reported - 2,390,357,790 cycles:u # 3.489 GHz (74.41%) - 182,998 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.25%) - 251,809,756 stalled-cycles-backend:u # 10.53% backend cycles idle (74.83%) - 7,021,160,599 instructions:u # 2.94 insn per cycle - # 0.04 stalled cycles per insn (75.48%) - 0.689173465 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10773) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.565281e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.571362e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.571362e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 1.052426 sec +INFO: No Floating Point Exceptions have been reported + 2,984,858,604 cycles # 2.826 GHz + 7,206,634,684 instructions # 2.41 insn per cycle + 1.056645042 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:12437) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627195e-04 -Avg ME (F77/C++) = 6.6271946993158581E-004 -Relative difference = 4.537125319208525e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271939668088170E-004 +Relative difference = 5.008331292535666e-09 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.812875e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.821466e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.821466e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060560e+00 +- 2.367611e+00 ) GeV^-4 +TOTAL : 0.909433 sec +INFO: No Floating Point Exceptions have been reported + 2,611,310,870 cycles # 2.860 GHz + 6,544,588,321 instructions # 2.51 insn per cycle + 0.913642429 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11449) (512y: 27) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627194e-04 +Avg ME (F77/C++) = 6.6271939668088170E-004 +Relative difference = 5.008331292535666e-09 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.437201e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.442373e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.442373e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060562e+00 +- 2.367612e+00 ) GeV^-4 +TOTAL : 1.146158 sec +INFO: No Floating Point Exceptions have been reported + 2,140,140,974 cycles # 1.862 GHz + 3,461,558,427 instructions # 1.62 insn per cycle + 1.150379984 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3037) (512y: 25) (512z: 9677) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627195e-04 +Avg ME (F77/C++) = 6.6271952032316561E-004 +Relative difference = 3.066631594207157e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt index f436c07646..59d4d1fb5f 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_11:07:52 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:55:46 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.000948e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.160689e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.161056e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 -TOTAL : 0.486809 sec -INFO: No Floating Point Exceptions have been reported - 1,377,180,607 cycles:u # 2.793 GHz (75.77%) - 2,398,789 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.40%) - 11,764,538 stalled-cycles-backend:u # 0.85% backend cycles idle (73.74%) - 1,890,847,506 instructions:u # 1.37 insn per cycle - # 0.01 stalled cycles per insn (73.97%) - 0.540106625 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.562021e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.605671e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.609619e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.491571 sec +INFO: No Floating Point Exceptions have been reported + 2,109,215,463 cycles # 2.972 GHz + 3,151,172,679 instructions # 1.49 insn per cycle + 0.768602284 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.942513e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.966205e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.966496e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 -TOTAL : 3.639339 sec -INFO: No Floating Point Exceptions have been reported - 12,378,328,092 cycles:u # 3.386 GHz (74.81%) - 2,874,435 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.81%) - 5,888,561 stalled-cycles-backend:u # 0.05% backend cycles idle (74.93%) - 11,359,397,040 instructions:u # 0.92 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 3.695933493 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.602270e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.673827e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.676735e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.733623 sec +INFO: No Floating Point Exceptions have been reported + 5,929,772,785 cycles # 3.016 GHz + 12,569,897,546 instructions # 2.12 insn per cycle + 2.025144690 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025603446138E-004 -Relative difference = 4.022437625032909e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262669162351490E-004 +Relative difference = 2.8232862531213374e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 6.090078e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.090698e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.090698e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 -TOTAL : 26.936506 sec -INFO: No Floating Point Exceptions have been reported - 94,349,632,400 cycles:u # 3.502 GHz (75.00%) - 321,547,192 stalled-cycles-frontend:u # 0.34% frontend cycles idle (75.00%) - 6,053,477,678 stalled-cycles-backend:u # 6.42% backend cycles idle (75.00%) - 132,416,937,199 instructions:u # 1.40 insn per cycle - # 0.05 stalled cycles per insn (75.00%) - 26.943698382 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:17007) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.758295e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.759107e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.759107e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 28.486353 sec +INFO: No Floating Point Exceptions have been reported + 86,270,016,297 cycles # 3.028 GHz + 135,669,129,169 instructions # 1.57 insn per cycle + 28.490480934 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15856) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275346655336742E-004 -Relative difference = 5.0466172741879477e-08 +Avg ME (F77/C++) = 6.6275349717465765E-004 +Relative difference = 4.26303654465793e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.852540e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.863192e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.863192e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 2.094908 sec -INFO: No Floating Point Exceptions have been reported - 7,335,187,413 cycles:u # 3.497 GHz (74.84%) - 369,790 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.83%) - 3,243,303,148 stalled-cycles-backend:u # 44.22% backend cycles idle (74.86%) - 19,156,715,937 instructions:u # 2.61 insn per cycle - # 0.17 stalled cycles per insn (75.04%) - 2.102051483 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:69115) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.086977e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.099732e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.099732e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.319304 sec +INFO: No Floating Point Exceptions have been reported + 6,773,827,971 cycles # 2.917 GHz + 19,353,970,780 instructions # 2.86 insn per cycle + 2.323538739 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69577) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274857190509046E-004 -Relative difference = 4.239150340994169e-08 +Avg ME (F77/C++) = 6.6274862748188362E-004 +Relative difference = 4.14665283800746e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.474649e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.478463e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.478463e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 1.118789 sec -INFO: No Floating Point Exceptions have been reported - 3,928,092,158 cycles:u # 3.502 GHz (74.85%) - 285,742 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.04%) - 2,218,678,549 stalled-cycles-backend:u # 56.48% backend cycles idle (75.04%) - 6,698,630,896 instructions:u # 1.71 insn per cycle - # 0.33 stalled cycles per insn (75.04%) - 1.125975009 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:48510) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.397177e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.402070e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.402070e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.178879 sec +INFO: No Floating Point Exceptions have been reported + 3,378,583,289 cycles # 2.858 GHz + 6,795,240,952 instructions # 2.01 insn per cycle + 1.183020517 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:49034) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627274e-04 -Avg ME (F77/C++) = 6.6272735727803539E-004 -Relative difference = 6.446385744398604e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731568543797E-004 +Relative difference = 2.3668012430631962e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.787992e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.796171e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.796171e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 0.922168 sec +INFO: No Floating Point Exceptions have been reported + 2,625,296,482 cycles # 2.836 GHz + 5,970,027,658 instructions # 2.27 insn per cycle + 0.926290404 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:42602) (512y: 11) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731568543797E-004 +Relative difference = 2.3668012430631962e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.494711e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.500327e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.500327e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.102442 sec +INFO: No Floating Point Exceptions have been reported + 2,067,516,500 cycles # 1.870 GHz + 3,494,858,338 instructions # 1.69 insn per cycle + 1.106623225 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 5208) (512y: 3) (512z:44858) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627275e-04 +Avg ME (F77/C++) = 6.6272750237027223E-004 +Relative difference = 3.5765412974815996e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt index 9afc98038f..f2c87a7ab9 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_11:08:34 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:56:35 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.013033e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.163858e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.164212e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.535601e-02 +- 4.279900e-02 ) GeV^-4 -TOTAL : 0.480319 sec -INFO: No Floating Point Exceptions have been reported - 1,415,656,442 cycles:u # 2.887 GHz (75.48%) - 2,488,617 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.52%) - 6,001,546 stalled-cycles-backend:u # 0.42% backend cycles idle (73.61%) - 1,830,372,309 instructions:u # 1.29 insn per cycle - # 0.00 stalled cycles per insn (74.67%) - 0.528836597 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.595159e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.631816e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.635791e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.059597e+00 +- 2.368053e+00 ) GeV^-4 +TOTAL : 0.493140 sec +INFO: No Floating Point Exceptions have been reported + 2,108,192,087 cycles # 2.971 GHz + 3,117,683,956 instructions # 1.48 insn per cycle + 0.768416097 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.005347e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.032091e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.032389e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.749227e+01 +- 6.205244e+01 ) GeV^-4 -TOTAL : 3.550404 sec -INFO: No Floating Point Exceptions have been reported - 12,052,442,667 cycles:u # 3.378 GHz (74.95%) - 2,882,292 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.89%) - 6,350,179 stalled-cycles-backend:u # 0.05% backend cycles idle (75.01%) - 11,059,578,171 instructions:u # 0.92 insn per cycle - # 0.00 stalled cycles per insn (75.13%) - 3.609767733 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.676536e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.747415e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.750543e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.664703e+00 +- 5.072736e+00 ) GeV^-4 +TOTAL : 1.730862 sec +INFO: No Floating Point Exceptions have been reported + 5,933,863,280 cycles # 3.005 GHz + 11,799,586,376 instructions # 1.99 insn per cycle + 2.031002433 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 6.626836e-04 -Avg ME (F77/GPU) = 6.6271025603446138E-004 -Relative difference = 4.022437625032909e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 6.626454e-04 +Avg ME (F77/GPU) = 6.6262669162351490E-004 +Relative difference = 2.8232862531213374e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.922308e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.922894e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.922894e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.204931e-01 +- 3.252405e-01 ) GeV^-4 -TOTAL : 27.698517 sec -INFO: No Floating Point Exceptions have been reported - 97,024,902,068 cycles:u # 3.503 GHz (74.99%) - 131,608,514 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.99%) - 5,804,063,067 stalled-cycles-backend:u # 5.98% backend cycles idle (74.99%) - 131,693,986,054 instructions:u # 1.36 insn per cycle - # 0.04 stalled cycles per insn (75.00%) - 27.705741729 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:16664) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.806823e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.807635e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.807635e+02 ) sec^-1 +MeanMatrixElemValue = ( 4.059969e+00 +- 2.367799e+00 ) GeV^-4 +TOTAL : 28.247116 sec +INFO: No Floating Point Exceptions have been reported + 85,893,515,248 cycles # 3.041 GHz + 135,352,063,458 instructions # 1.58 insn per cycle + 28.251186288 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:15471) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627535e-04 -Avg ME (F77/C++) = 6.6275348053303901E-004 -Relative difference = 2.9372852846917734e-08 +Avg ME (F77/C++) = 6.6275349662128086E-004 +Relative difference = 5.098002770919431e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.243608e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.255433e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.255433e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.211992e-01 +- 3.254573e-01 ) GeV^-4 -TOTAL : 1.996148 sec -INFO: No Floating Point Exceptions have been reported - 6,993,731,379 cycles:u # 3.499 GHz (74.89%) - 958,688 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.79%) - 2,969,879,973 stalled-cycles-backend:u # 42.46% backend cycles idle (74.75%) - 19,159,605,029 instructions:u # 2.74 insn per cycle - # 0.16 stalled cycles per insn (74.95%) - 2.003319478 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:68769) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.048812e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.061380e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.061380e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.059962e+00 +- 2.367792e+00 ) GeV^-4 +TOTAL : 2.331526 sec +INFO: No Floating Point Exceptions have been reported + 6,855,274,765 cycles # 2.936 GHz + 19,472,640,725 instructions # 2.84 insn per cycle + 2.335711915 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:69876) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.627486e-04 -Avg ME (F77/C++) = 6.6274857155746575E-004 -Relative difference = 4.291602312495571e-08 +Avg ME (F77/C++) = 6.6274862799683282E-004 +Relative difference = 4.2243518621014775e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=1] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.443883e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.447559e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.447559e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.211846e-01 +- 3.254638e-01 ) GeV^-4 -TOTAL : 1.142259 sec -INFO: No Floating Point Exceptions have been reported - 3,998,873,840 cycles:u # 3.492 GHz (74.86%) - 52,085,002 stalled-cycles-frontend:u # 1.30% frontend cycles idle (74.85%) - 2,183,378,746 stalled-cycles-backend:u # 54.60% backend cycles idle (74.85%) - 6,643,476,000 instructions:u # 1.66 insn per cycle - # 0.33 stalled cycles per insn (74.85%) - 1.149023989 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47334) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.463700e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.469145e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.469145e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.125411 sec +INFO: No Floating Point Exceptions have been reported + 3,100,011,361 cycles # 2.746 GHz + 6,715,084,131 instructions # 2.17 insn per cycle + 1.129564678 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:47692) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 6.627274e-04 -Avg ME (F77/C++) = 6.6272735712090414E-004 -Relative difference = 6.470095531024898e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731623419345E-004 +Relative difference = 2.449603850635964e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.701785e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.709182e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.709182e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060903e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 0.968805 sec +INFO: No Floating Point Exceptions have been reported + 2,625,966,040 cycles # 2.701 GHz + 5,966,391,975 instructions # 2.27 insn per cycle + 0.972890407 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:41858) (512y: 13) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627273e-04 +Avg ME (F77/C++) = 6.6272731623419345E-004 +Relative difference = 2.449603850635964e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.484080e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.489679e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.489679e+04 ) sec^-1 +MeanMatrixElemValue = ( 4.060905e+00 +- 2.367377e+00 ) GeV^-4 +TOTAL : 1.110163 sec +INFO: No Floating Point Exceptions have been reported + 2,071,498,058 cycles # 1.861 GHz + 3,487,792,468 instructions # 1.68 insn per cycle + 1.114282581 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4171) (512y: 4) (512z:44494) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.627275e-04 +Avg ME (F77/C++) = 6.6272750247886592E-004 +Relative difference = 3.740400032174438e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt index 33cd2d7259..97e6470827 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_10:28:20 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:30:41 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.195655e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.256284e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.256437e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.582310 sec -INFO: No Floating Point Exceptions have been reported - 1,548,799,561 cycles:u # 2.707 GHz (75.20%) - 2,635,387 stalled-cycles-frontend:u # 0.17% frontend cycles idle (75.15%) - 7,844,789 stalled-cycles-backend:u # 0.51% backend cycles idle (76.11%) - 2,004,827,952 instructions:u # 1.29 insn per cycle - # 0.00 stalled cycles per insn (74.67%) - 0.637286518 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.316539e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.346233e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.348408e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.533376 sec +INFO: No Floating Point Exceptions have been reported + 2,273,630,859 cycles # 2.959 GHz + 3,530,304,224 instructions # 1.55 insn per cycle + 0.826605443 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.797960e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.804166e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.804281e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.074389 sec -INFO: No Floating Point Exceptions have been reported - 19,351,372,698 cycles:u # 3.173 GHz (75.09%) - 3,194,528 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.99%) - 5,306,036 stalled-cycles-backend:u # 0.03% backend cycles idle (74.98%) - 17,348,901,637 instructions:u # 0.90 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 6.141023183 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.119929e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.150275e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.151562e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.050268 sec +INFO: No Floating Point Exceptions have been reported + 9,709,254,510 cycles # 2.935 GHz + 13,370,261,279 instructions # 1.38 insn per cycle + 3.367751590 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.561544e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.562742e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.562742e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.411011 sec -INFO: No Floating Point Exceptions have been reported - 21,808,026,475 cycles:u # 3.400 GHz (74.92%) - 1,498,697 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.98%) - 2,731,850,634 stalled-cycles-backend:u # 12.53% backend cycles idle (75.05%) - 78,797,940,588 instructions:u # 3.61 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 6.418516328 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4817) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.915345e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.916261e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.916261e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.569018 sec +INFO: No Floating Point Exceptions have been reported + 25,934,368,405 cycles # 3.026 GHz + 79,430,143,870 instructions # 3.06 insn per cycle + 8.573244716 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4775) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.541327e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.546601e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.546601e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 2.967715 sec -INFO: No Floating Point Exceptions have been reported - 10,392,967,476 cycles:u # 3.499 GHz (74.96%) - 3,206,515 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.96%) - 1,346,372,943 stalled-cycles-backend:u # 12.95% backend cycles idle (74.96%) - 38,655,309,883 instructions:u # 3.72 insn per cycle - # 0.03 stalled cycles per insn (74.96%) - 2.974858471 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:12020) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.634190e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.637434e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.637434e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.519341 sec +INFO: No Floating Point Exceptions have been reported + 12,845,450,280 cycles # 2.841 GHz + 38,825,374,620 instructions # 3.02 insn per cycle + 4.523658769 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:13173) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.223037e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.225584e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.225584e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.349009 sec -INFO: No Floating Point Exceptions have been reported - 4,726,511,869 cycles:u # 3.496 GHz (74.83%) - 2,315,884 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.15%) - 463,423,377 stalled-cycles-backend:u # 9.80% backend cycles idle (75.15%) - 13,596,968,035 instructions:u # 2.88 insn per cycle - # 0.03 stalled cycles per insn (75.15%) - 1.356861285 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10261) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.419852e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.436995e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.436995e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.954378 sec +INFO: No Floating Point Exceptions have been reported + 5,613,587,439 cycles # 2.867 GHz + 13,617,535,847 instructions # 2.43 insn per cycle + 1.958653443 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11427) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276836E-004 -Relative difference = 2.9563428359824236e-07 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.634198e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.657060e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.657060e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.708774 sec +INFO: No Floating Point Exceptions have been reported + 4,864,533,016 cycles # 2.841 GHz + 12,296,957,793 instructions # 2.53 insn per cycle + 1.713075276 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10331) (512y: 80) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.360180e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.374428e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.374428e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.234665 sec +INFO: No Floating Point Exceptions have been reported + 4,169,044,558 cycles # 1.863 GHz + 6,391,574,666 instructions # 1.53 insn per cycle + 2.238987087 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1983) (512y: 92) (512z: 9360) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt index dd054f4226..e533cb8a65 100644 --- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -DATE: 2024-10-04_10:28:43 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:31:14 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.219819e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.274835e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.274989e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.804675e-02 +- 2.047289e-02 ) GeV^-4 -TOTAL : 0.538450 sec -INFO: No Floating Point Exceptions have been reported - 1,578,538,660 cycles:u # 2.872 GHz (75.85%) - 2,510,341 stalled-cycles-frontend:u # 0.16% frontend cycles idle (76.14%) - 6,851,856 stalled-cycles-backend:u # 0.43% backend cycles idle (75.54%) - 2,042,648,852 instructions:u # 1.29 insn per cycle - # 0.00 stalled cycles per insn (75.16%) - 0.592143001 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.333573e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.363743e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.365714e+05 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 0.533533 sec +INFO: No Floating Point Exceptions have been reported + 2,265,915,416 cycles # 2.955 GHz + 3,527,237,824 instructions # 1.56 insn per cycle + 0.825201688 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.812105e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.818586e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.818703e+05 ) sec^-1 -MeanMatrixElemValue = ( 6.694853e+01 +- 6.364791e+01 ) GeV^-4 -TOTAL : 6.030619 sec -INFO: No Floating Point Exceptions have been reported - 20,676,199,911 cycles:u # 3.416 GHz (74.93%) - 3,297,259 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.93%) - 7,355,442 stalled-cycles-backend:u # 0.04% backend cycles idle (74.91%) - 18,492,665,928 instructions:u # 0.89 insn per cycle - # 0.00 stalled cycles per insn (75.11%) - 6.094301410 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.131054e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.161865e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.163156e+05 ) sec^-1 +MeanMatrixElemValue = ( 6.665112e+00 +- 5.002651e+00 ) GeV^-4 +TOTAL : 3.042026 sec +INFO: No Floating Point Exceptions have been reported + 9,721,344,649 cycles # 2.947 GHz + 14,284,197,890 instructions # 1.47 insn per cycle + 3.359293537 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 6.626675e-04 Avg ME (F77/GPU) = 6.6266732376103494E-004 Relative difference = 2.659538381540814e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.668544e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.669760e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.669760e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 6.153508 sec -INFO: No Floating Point Exceptions have been reported - 21,554,539,794 cycles:u # 3.501 GHz (74.99%) - 884,199 stalled-cycles-frontend:u # 0.00% frontend cycles idle (75.04%) - 2,816,905,461 stalled-cycles-backend:u # 13.07% backend cycles idle (74.99%) - 78,855,686,322 instructions:u # 3.66 insn per cycle - # 0.04 stalled cycles per insn (74.99%) - 6.161032029 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 4763) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.920229e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.921140e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.921140e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 8.546949 sec +INFO: No Floating Point Exceptions have been reported + 25,998,282,864 cycles # 3.041 GHz + 79,450,746,897 instructions # 3.06 insn per cycle + 8.551213538 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 4431) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266731406016235E-004 Relative difference = 2.8059296349552523e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.429343e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.434335e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.434335e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 3.028366 sec -INFO: No Floating Point Exceptions have been reported - 10,615,169,652 cycles:u # 3.502 GHz (74.93%) - 4,111,125 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.93%) - 1,389,155,199 stalled-cycles-backend:u # 13.09% backend cycles idle (74.93%) - 38,676,034,023 instructions:u # 3.64 insn per cycle - # 0.04 stalled cycles per insn (74.92%) - 3.036335947 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:11990) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.656713e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.660030e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.660030e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 4.491295 sec +INFO: No Floating Point Exceptions have been reported + 12,816,709,585 cycles # 2.852 GHz + 38,780,987,144 instructions # 3.03 insn per cycle + 4.495553287 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:12935) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 Avg ME (F77/C++) = 6.6266730246908442E-004 Relative difference = 2.98084507782618e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.229287e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.231865e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.231865e+04 ) sec^-1 -MeanMatrixElemValue = ( 4.197467e-01 +- 3.250467e-01 ) GeV^-4 -TOTAL : 1.342031 sec -INFO: No Floating Point Exceptions have been reported - 4,712,479,104 cycles:u # 3.504 GHz (74.90%) - 2,278,012 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.02%) - 444,621,609 stalled-cycles-backend:u # 9.43% backend cycles idle (75.02%) - 13,604,129,685 instructions:u # 2.89 insn per cycle - # 0.03 stalled cycles per insn (75.02%) - 1.349775578 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10235) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.232154e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.248832e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.248832e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.998281 sec +INFO: No Floating Point Exceptions have been reported + 5,587,815,925 cycles # 2.792 GHz + 13,730,785,401 instructions # 2.46 insn per cycle + 2.002499994 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:11510) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 6.626675e-04 -Avg ME (F77/C++) = 6.6266730409276836E-004 -Relative difference = 2.9563428359824236e-07 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.273072e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.294230e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.294230e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 1.774969 sec +INFO: No Floating Point Exceptions have been reported + 4,961,155,724 cycles # 2.790 GHz + 12,423,809,903 instructions # 2.50 insn per cycle + 1.779214057 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:10322) (512y: 240) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.260898e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.274229e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.274229e+03 ) sec^-1 +MeanMatrixElemValue = ( 4.063123e+00 +- 2.368970e+00 ) GeV^-4 +TOTAL : 2.266066 sec +INFO: No Floating Point Exceptions have been reported + 4,182,312,406 cycles # 1.843 GHz + 6,495,020,499 instructions # 1.55 insn per cycle + 2.270352700 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1806) (512y: 190) (512z: 9358) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 6.626675e-04 +Avg ME (F77/C++) = 6.6266730409276857E-004 +Relative difference = 2.956342832710188e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt index a754646936..58a216130e 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-10-04_10:29:43 +make: Nothing to be done for 'all'. + +DATE: 2024-10-02_22:33:06 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.059066e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.059482e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.059641e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.448019 sec +INFO: No Floating Point Exceptions have been reported + 8,346,552,119 cycles # 3.010 GHz + 17,505,316,851 instructions # 2.10 insn per cycle + 2.833264459 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.249682e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.251806e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.252033e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.012957 sec +INFO: No Floating Point Exceptions have been reported + 13,135,921,613 cycles # 3.025 GHz + 31,141,588,241 instructions # 2.37 insn per cycle + 4.400245474 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.197107e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.197160e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.197160e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.431348 sec -INFO: No Floating Point Exceptions have been reported - 15,441,098,907 cycles:u # 3.496 GHz (75.01%) - 9,894,890 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.00%) - 1,614,181,861 stalled-cycles-backend:u # 10.45% backend cycles idle (75.00%) - 53,530,475,903 instructions:u # 3.47 insn per cycle - # 0.03 stalled cycles per insn (75.00%) - 4.438636757 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.899243e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.899462e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.899462e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.685169 sec +INFO: No Floating Point Exceptions have been reported + 18,964,432,627 cycles # 2.836 GHz + 53,903,774,133 instructions # 2.84 insn per cycle + 6.689349528 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.340376e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.340512e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.340512e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.258121 sec -INFO: No Floating Point Exceptions have been reported - 7,906,059,909 cycles:u # 3.497 GHz (74.88%) - 1,356,724 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.88%) - 767,966,259 stalled-cycles-backend:u # 9.71% backend cycles idle (74.90%) - 27,078,328,956 instructions:u # 3.43 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 2.265122330 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.626145e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.626234e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.626234e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.249075 sec +INFO: No Floating Point Exceptions have been reported + 9,790,241,271 cycles # 3.010 GHz + 27,152,279,760 instructions # 2.77 insn per cycle + 3.253283773 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.201588e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.202076e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.202076e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.017046 sec -INFO: No Floating Point Exceptions have been reported - 3,564,414,743 cycles:u # 3.495 GHz (74.97%) - 1,108,073 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.90%) - 310,098,984 stalled-cycles-backend:u # 8.70% backend cycles idle (74.90%) - 9,561,959,007 instructions:u # 2.68 insn per cycle - # 0.03 stalled cycles per insn (74.90%) - 1.024814784 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.533274e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.533700e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.533700e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.496587 sec +INFO: No Floating Point Exceptions have been reported + 4,263,425,533 cycles # 2.842 GHz + 9,591,372,936 instructions # 2.25 insn per cycle + 1.500755370 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285459444E-003 -Relative difference = 3.5163711246052657e-07 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.966938e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.967470e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.967470e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.332801 sec +INFO: No Floating Point Exceptions have been reported + 3,736,922,615 cycles # 2.796 GHz + 8,515,084,014 instructions # 2.28 insn per cycle + 1.337097137 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 90) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.547498e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.548061e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.548061e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.490279 sec +INFO: No Floating Point Exceptions have been reported + 2,700,551,857 cycles # 1.808 GHz + 4,281,722,844 instructions # 1.59 insn per cycle + 1.494618048 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2856) (512y: 102) (512z:79114) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt index 1ca1764591..1615b7402d 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,35 +19,96 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-10-04_11:16:19 +make: Nothing to be done for 'all'. + +DATE: 2024-10-02_23:03:58 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.055259e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.057350e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.057350e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.372375 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 8,116,434,360 cycles # 3.010 GHz + 18,416,481,934 instructions # 2.27 insn per cycle + 2.753979421 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.189805e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.222017e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.222017e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.994979 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 13,081,625,338 cycles # 3.026 GHz + 28,387,877,176 instructions # 2.17 insn per cycle + 4.377406416 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.202290e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.202327e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.202327e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.394959 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 15,395,630,820 cycles:u # 3.501 GHz (74.90%) - 7,691,352 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.94%) - 1,587,810,411 stalled-cycles-backend:u # 10.31% backend cycles idle (75.03%) - 53,478,307,867 instructions:u # 3.47 insn per cycle - # 0.03 stalled cycles per insn (75.08%) - 4.402757479 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44571) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.186410e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.186644e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.186644e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.462059 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 19,169,468,026 cycles # 2.965 GHz + 53,903,983,718 instructions # 2.81 insn per cycle + 6.466524182 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -55,36 +116,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.348643e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.348774e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.348774e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.250266 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 7,882,659,724 cycles:u # 3.498 GHz (74.82%) - 2,291,383 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.76%) - 810,603,743 stalled-cycles-backend:u # 10.28% backend cycles idle (74.94%) - 27,087,969,672 instructions:u # 3.44 insn per cycle - # 0.03 stalled cycles per insn (75.15%) - 2.258094292 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95842) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.623131e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.623222e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.623222e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.254596 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 9,880,100,026 cycles # 3.033 GHz + 27,153,310,266 instructions # 2.75 insn per cycle + 3.259041098 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -92,36 +150,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.138759e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.139242e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.139242e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.029966 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,604,197,914 cycles:u # 3.489 GHz (74.55%) - 1,430,799 stalled-cycles-frontend:u # 0.04% frontend cycles idle (74.92%) - 303,526,117 stalled-cycles-backend:u # 8.42% backend cycles idle (75.22%) - 9,570,463,697 instructions:u # 2.66 insn per cycle - # 0.03 stalled cycles per insn (75.22%) - 1.037411100 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83781) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.505113e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.505536e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.505536e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.508139 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,272,653,512 cycles # 2.826 GHz + 9,594,202,047 instructions # 2.25 insn per cycle + 1.512512017 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84961) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -129,16 +184,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285459444E-003 -Relative difference = 3.5163711246052657e-07 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.983827e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.984375e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.984375e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.327337 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 3,731,860,346 cycles # 2.803 GHz + 8,517,006,189 instructions # 2.28 insn per cycle + 1.331804367 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80609) (512y: 90) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.634471e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.635161e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.635161e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.454363 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,703,496,141 cycles # 1.854 GHz + 4,284,293,846 instructions # 1.58 insn per cycle + 1.458845276 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2856) (512y: 102) (512z:79114) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt index 52d5d80fe7..3a68912814 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-10-04_10:30:20 +make: Nothing to be done for 'all'. + +DATE: 2024-10-02_22:34:32 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.055952e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.056442e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.056602e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 2.449389 sec +INFO: No Floating Point Exceptions have been reported + 8,348,082,530 cycles # 3.004 GHz + 16,524,233,578 instructions # 1.98 insn per cycle + 2.837366535 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 9.258307e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.260215e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.260440e+03 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 4.014474 sec +INFO: No Floating Point Exceptions have been reported + 13,153,845,841 cycles # 3.028 GHz + 31,087,113,730 instructions # 2.36 insn per cycle + 4.401303970 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722595284406640E-003 +Relative difference = 3.5164777671934515e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.182021e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.182059e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.182059e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.468565 sec -INFO: No Floating Point Exceptions have been reported - 15,634,721,907 cycles:u # 3.497 GHz (74.95%) - 5,085,110 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.95%) - 1,647,500,835 stalled-cycles-backend:u # 10.54% backend cycles idle (74.95%) - 53,473,632,621 instructions:u # 3.42 insn per cycle - # 0.03 stalled cycles per insn (74.98%) - 4.476299042 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44484) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.940699e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.940944e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.940944e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.640802 sec +INFO: No Floating Point Exceptions have been reported + 18,841,020,722 cycles # 2.836 GHz + 53,933,535,215 instructions # 2.86 insn per cycle + 6.644982679 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32022) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.347167e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.347309e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.347309e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.251744 sec -INFO: No Floating Point Exceptions have been reported - 7,882,137,396 cycles:u # 3.496 GHz (74.81%) - 15,287,325 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.82%) - 758,625,123 stalled-cycles-backend:u # 9.62% backend cycles idle (74.97%) - 27,083,240,161 instructions:u # 3.44 insn per cycle - # 0.03 stalled cycles per insn (75.13%) - 2.258698525 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95581) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.601269e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.601355e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.601355e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 3.298877 sec +INFO: No Floating Point Exceptions have been reported + 9,967,394,924 cycles # 3.018 GHz + 27,130,116,099 instructions # 2.72 insn per cycle + 3.303134949 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96368) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595285514851E-003 Relative difference = 3.5163655122073967e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.209504e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.209984e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.209984e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.015075 sec -INFO: No Floating Point Exceptions have been reported - 3,546,929,063 cycles:u # 3.484 GHz (74.87%) - 1,200,895 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.86%) - 274,080,517 stalled-cycles-backend:u # 7.73% backend cycles idle (74.86%) - 9,561,199,112 instructions:u # 2.70 insn per cycle - # 0.03 stalled cycles per insn (74.86%) - 1.023044859 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83752) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.524300e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.524716e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.524716e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.500483 sec +INFO: No Floating Point Exceptions have been reported + 4,288,401,155 cycles # 2.852 GHz + 9,585,756,274 instructions # 2.24 insn per cycle + 1.504684164 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84968) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 -Avg ME (F77/C++) = 9.8722595285459444E-003 -Relative difference = 3.5163711246052657e-07 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.003171e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.003722e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.003722e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.320958 sec +INFO: No Floating Point Exceptions have been reported + 3,744,622,204 cycles # 2.828 GHz + 8,508,595,657 instructions # 2.27 insn per cycle + 1.325042842 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80632) (512y: 240) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.615962e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.616495e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.616495e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 1.462041 sec +INFO: No Floating Point Exceptions have been reported + 2,701,843,389 cycles # 1.843 GHz + 4,281,298,665 instructions # 1.58 insn per cycle + 1.466469773 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2693) (512y: 184) (512z:79098) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722595285411531E-003 +Relative difference = 3.516375977906115e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt index 08f0618e5c..c5830d5029 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-10-04_10:30:57 +make: Nothing to be done for 'all'. + +DATE: 2024-10-02_22:35:58 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.207882e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.208719e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.208944e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.755815 sec +INFO: No Floating Point Exceptions have been reported + 6,030,784,063 cycles # 2.986 GHz + 12,690,536,183 instructions # 2.10 insn per cycle + 2.076295584 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.154878e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.155502e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.155595e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 +TOTAL : 2.055928 sec +INFO: No Floating Point Exceptions have been reported + 6,993,860,684 cycles # 3.012 GHz + 14,389,037,711 instructions # 2.06 insn per cycle + 2.378610677 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260159E-003 +Relative difference = 0.0021940095370046923 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.079901e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.079923e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.079923e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 -TOTAL : 4.889350 sec -INFO: No Floating Point Exceptions have been reported - 17,110,757,262 cycles:u # 3.498 GHz (74.98%) - 101,242,552 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.98%) - 1,834,928,824 stalled-cycles-backend:u # 10.72% backend cycles idle (74.98%) - 54,147,547,381 instructions:u # 3.16 insn per cycle - # 0.03 stalled cycles per insn (74.98%) - 4.896725658 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.791338e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.791603e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.791603e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.007057 sec +INFO: No Floating Point Exceptions have been reported + 18,246,753,562 cycles # 3.036 GHz + 53,910,639,040 instructions # 2.95 insn per cycle + 6.011238409 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855168e-03 -Avg ME (F77/C++) = 9.8551676614203575E-003 -Relative difference = 3.4355542366580335e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847961e-03 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.719996e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.720382e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.720382e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 -TOTAL : 1.119334 sec -INFO: No Floating Point Exceptions have been reported - 3,914,853,183 cycles:u # 3.489 GHz (75.05%) - 50,519,645 stalled-cycles-frontend:u # 1.29% frontend cycles idle (75.05%) - 382,127,369 stalled-cycles-backend:u # 9.76% backend cycles idle (75.05%) - 13,751,093,710 instructions:u # 3.51 insn per cycle - # 0.03 stalled cycles per insn (75.05%) - 1.126666107 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.482340e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.482762e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.482762e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.518087 sec +INFO: No Floating Point Exceptions have been reported + 4,616,306,696 cycles # 3.034 GHz + 13,807,478,566 instructions # 2.99 insn per cycle + 1.522256201 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855164e-03 -Avg ME (F77/C++) = 9.8551639361110794E-003 -Relative difference = 6.48278610035626e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896367235E-003 +Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.014472e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.014627e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.014627e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 -TOTAL : 0.522139 sec -INFO: No Floating Point Exceptions have been reported - 1,831,942,479 cycles:u # 3.489 GHz (74.58%) - 15,822,916 stalled-cycles-frontend:u # 0.86% frontend cycles idle (74.15%) - 162,914,012 stalled-cycles-backend:u # 8.89% backend cycles idle (74.22%) - 4,832,252,888 instructions:u # 2.64 insn per cycle - # 0.03 stalled cycles per insn (74.98%) - 0.529423018 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.020421e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.022190e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.022190e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.754330 sec +INFO: No Floating Point Exceptions have been reported + 2,137,577,296 cycles # 2.820 GHz + 4,836,841,238 instructions # 2.26 insn per cycle + 0.758604558 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.836478e-03 -Avg ME (F77/C++) = 9.8364784946823516E-003 -Relative difference = 5.0290597139820844e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.912780e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.914883e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.914883e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.669071 sec +INFO: No Floating Point Exceptions have been reported + 1,900,823,035 cycles # 2.826 GHz + 4,291,171,823 instructions # 2.26 insn per cycle + 0.673206807 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81183) (512y: 45) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.288558e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.290700e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.290700e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.727738 sec +INFO: No Floating Point Exceptions have been reported + 1,355,809,114 cycles # 1.853 GHz + 2,162,656,295 instructions # 1.60 insn per cycle + 0.732221235 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3481) (512y: 45) (512z:79330) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982676284E-003 +Relative difference = 2.004124217057488e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt index 5f9dc096d3..725d6753a9 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,35 +19,96 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-10-04_11:16:56 +make: Nothing to be done for 'all'. + +DATE: 2024-10-02_23:05:24 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.294446e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.299887e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.299887e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187093e-05 +- 9.825663e-06 ) GeV^-6 +TOTAL : 1.676123 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 5,803,384,426 cycles # 2.997 GHz + 12,435,271,508 instructions # 2.14 insn per cycle + 1.992620080 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP= +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.134524e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.145734e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.145734e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856440e-04 +- 8.331091e-05 ) GeV^-6 +TOTAL : 2.020497 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 6,855,684,842 cycles # 3.005 GHz + 14,918,783,289 instructions # 2.18 insn per cycle + 2.337019864 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260159E-003 +Relative difference = 0.0021940095370046923 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.072401e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.072422e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.072422e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 -TOTAL : 4.923168 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 17,240,159,807 cycles:u # 3.500 GHz (74.96%) - 101,183,646 stalled-cycles-frontend:u # 0.59% frontend cycles idle (74.99%) - 1,888,003,325 stalled-cycles-backend:u # 10.95% backend cycles idle (74.99%) - 54,161,007,670 instructions:u # 3.14 insn per cycle - # 0.03 stalled cycles per insn (74.99%) - 4.930553656 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:33073) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.807568e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.807845e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.807845e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 5.997723 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 18,158,608,631 cycles # 3.026 GHz + 53,912,576,507 instructions # 2.97 insn per cycle + 6.001895502 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -55,36 +116,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855168e-03 -Avg ME (F77/C++) = 9.8551676614203575E-003 -Relative difference = 3.4355542366580335e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847961e-03 +Avg ME (F77/C++) = 9.8479612087551509E-003 +Relative difference = 2.119780432912131e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.902451e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.902893e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.902893e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 -TOTAL : 1.079324 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,786,149,577 cycles:u # 3.498 GHz (74.73%) - 774,965 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.87%) - 367,116,707 stalled-cycles-backend:u # 9.70% backend cycles idle (74.87%) - 13,749,519,327 instructions:u # 3.63 insn per cycle - # 0.03 stalled cycles per insn (74.87%) - 1.086555722 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95933) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.488685e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.489192e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.489192e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.515485 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,602,710,939 cycles # 3.030 GHz + 13,809,381,685 instructions # 3.00 insn per cycle + 1.519902029 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -92,36 +150,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855164e-03 -Avg ME (F77/C++) = 9.8551639361110794E-003 -Relative difference = 6.48278610035626e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896367235E-003 +Relative difference = 3.1515505172940424e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=256) -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.042807e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.042969e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.042969e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 -TOTAL : 0.509031 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,781,439,195 cycles:u # 3.480 GHz (75.03%) - 229,611 stalled-cycles-frontend:u # 0.01% frontend cycles idle (75.01%) - 143,418,602 stalled-cycles-backend:u # 8.05% backend cycles idle (75.01%) - 4,815,198,676 instructions:u # 2.70 insn per cycle - # 0.03 stalled cycles per insn (75.01%) - 0.516024912 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84347) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.102201e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.103949e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.103949e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.745530 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,129,106,437 cycles # 2.842 GHz + 4,838,834,024 instructions # 2.27 insn per cycle + 0.749838678 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85494) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -129,16 +184,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.836478e-03 -Avg ME (F77/C++) = 9.8364784946823516E-003 -Relative difference = 5.0290597139820844e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.914657e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.916750e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.916750e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.669155 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,905,705,276 cycles # 2.833 GHz + 4,293,242,906 instructions # 2.25 insn per cycle + 0.673440078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81183) (512y: 45) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091246E-003 +Relative difference = 1.8588029579156084e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=256) +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.205807e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.208130e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.208130e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.736316 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,359,100,452 cycles # 1.836 GHz + 2,164,753,539 instructions # 1.59 insn per cycle + 0.740818713 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3481) (512y: 45) (512z:79330) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982676284E-003 +Relative difference = 2.004124217057488e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt index 03c4dcf765..d9277e9262 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-10-04_10:31:29 +make: Nothing to be done for 'all'. + +DATE: 2024-10-02_22:36:59 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 6.201907e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.202602e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.202848e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.186984e-05 +- 9.824899e-06 ) GeV^-6 +TOTAL : 1.759755 sec +INFO: No Floating Point Exceptions have been reported + 6,041,131,533 cycles # 2.987 GHz + 12,887,925,845 instructions # 2.13 insn per cycle + 2.079278840 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 2.142501e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.143086e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.143184e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856829e-04 +- 8.333437e-05 ) GeV^-6 +TOTAL : 2.062982 sec +INFO: No Floating Point Exceptions have been reported + 7,025,736,377 cycles # 3.016 GHz + 14,376,566,106 instructions # 2.05 insn per cycle + 2.386284867 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.849635e-03 +Avg ME (F77/GPU) = 9.8712451931260107E-003 +Relative difference = 0.0021940095370041636 +OK (relative difference <= 5E-3) +========================================================================= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.080560e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.080582e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.080582e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927928e-03 +- 4.922377e-03 ) GeV^-6 -TOTAL : 4.886139 sec -INFO: No Floating Point Exceptions have been reported - 17,112,350,866 cycles:u # 3.500 GHz (74.97%) - 102,359,219 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.97%) - 1,775,063,311 stalled-cycles-backend:u # 10.37% backend cycles idle (74.97%) - 54,141,024,086 instructions:u # 3.16 insn per cycle - # 0.03 stalled cycles per insn (74.98%) - 4.894054989 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:33154) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.806311e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.806570e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.806570e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825040e-06 ) GeV^-6 +TOTAL : 6.000091 sec +INFO: No Floating Point Exceptions have been reported + 18,259,581,889 cycles # 3.042 GHz + 53,898,592,963 instructions # 2.95 insn per cycle + 6.004360411 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855168e-03 -Avg ME (F77/C++) = 9.8551676614199186E-003 -Relative difference = 3.435558690007174e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847961e-03 +Avg ME (F77/C++) = 9.8479612087572898E-003 +Relative difference = 2.1198021522715588e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.921510e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.921954e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.921954e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.927926e-03 +- 4.922375e-03 ) GeV^-6 -TOTAL : 1.074522 sec -INFO: No Floating Point Exceptions have been reported - 3,759,358,418 cycles:u # 3.489 GHz (74.76%) - 649,246 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.76%) - 360,850,463 stalled-cycles-backend:u # 9.60% backend cycles idle (74.69%) - 13,770,148,457 instructions:u # 3.66 insn per cycle - # 0.03 stalled cycles per insn (75.06%) - 1.082360075 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95973) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.506868e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.507352e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.507352e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187013e-05 +- 9.825037e-06 ) GeV^-6 +TOTAL : 1.507769 sec +INFO: No Floating Point Exceptions have been reported + 4,592,889,606 cycles # 3.040 GHz + 13,800,588,544 instructions # 3.00 insn per cycle + 1.511992304 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96651) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.855164e-03 -Avg ME (F77/C++) = 9.8551639361110794E-003 -Relative difference = 6.48278610035626e-09 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.847955e-03 +Avg ME (F77/C++) = 9.8479546896065809E-003 +Relative difference = 3.151856596628469e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.040829e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.040996e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.040996e+03 ) sec^-1 -MeanMatrixElemValue = ( 4.918583e-03 +- 4.913042e-03 ) GeV^-6 -TOTAL : 0.509322 sec -INFO: No Floating Point Exceptions have been reported - 1,787,072,903 cycles:u # 3.486 GHz (75.12%) - 431,670 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.04%) - 140,499,187 stalled-cycles-backend:u # 7.86% backend cycles idle (75.04%) - 4,812,515,332 instructions:u # 2.69 insn per cycle - # 0.03 stalled cycles per insn (75.04%) - 0.516905560 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84309) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.927112e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.928805e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.928805e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.764116 sec +INFO: No Floating Point Exceptions have been reported + 2,152,921,246 cycles # 2.805 GHz + 4,840,961,497 instructions # 2.25 insn per cycle + 0.768293313 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:85884) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 9.836478e-03 -Avg ME (F77/C++) = 9.8364784946823516E-003 -Relative difference = 5.0290597139820844e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091923E-003 +Relative difference = 1.85880227405429e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.901326e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.903485e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.903485e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826767e-06 ) GeV^-6 +TOTAL : 0.669713 sec +INFO: No Floating Point Exceptions have been reported + 1,899,776,233 cycles # 2.822 GHz + 4,295,171,210 instructions # 2.26 insn per cycle + 0.673880897 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:81725) (512y: 25) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892973e-03 +Avg ME (F77/C++) = 9.8929728161091923E-003 +Relative difference = 1.85880227405429e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 7.249891e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.252145e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.252145e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187188e-05 +- 9.826771e-06 ) GeV^-6 +TOTAL : 0.730611 sec +INFO: No Floating Point Exceptions have been reported + 1,361,058,670 cycles # 1.854 GHz + 2,169,526,438 instructions # 1.59 insn per cycle + 0.734943392 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4092) (512y: 32) (512z:79551) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.892981e-03 +Avg ME (F77/C++) = 9.8929811982957326E-003 +Relative difference = 2.0044082998332894e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt index 116046dfb8..9d0b73e163 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-10-04_10:32:00 +make: Nothing to be done for 'all'. + +DATE: 2024-10-02_22:38:01 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.666751e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.667250e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.667415e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.193907 sec +INFO: No Floating Point Exceptions have been reported + 7,630,208,470 cycles # 3.025 GHz + 15,813,975,042 instructions # 2.07 insn per cycle + 2.578598510 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.108221e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.108518e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.108553e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.432158 sec +INFO: No Floating Point Exceptions have been reported + 11,402,912,009 cycles # 3.032 GHz + 24,689,535,297 instructions # 2.17 insn per cycle + 3.818442336 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.203416e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.203454e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.203454e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.390036 sec -INFO: No Floating Point Exceptions have been reported - 15,382,779,589 cycles:u # 3.502 GHz (74.94%) - 2,323,654 stalled-cycles-frontend:u # 0.02% frontend cycles idle (75.03%) - 1,701,905,344 stalled-cycles-backend:u # 11.06% backend cycles idle (75.05%) - 53,720,490,538 instructions:u # 3.49 insn per cycle - # 0.03 stalled cycles per insn (75.05%) - 4.396995633 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44590) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.867089e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.867297e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.867297e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.713479 sec +INFO: No Floating Point Exceptions have been reported + 19,196,861,628 cycles # 2.858 GHz + 54,133,636,915 instructions # 2.82 insn per cycle + 6.717705413 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32000) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.492350e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.492497e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.492497e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.120384 sec -INFO: No Floating Point Exceptions have been reported - 7,427,822,621 cycles:u # 3.498 GHz (74.84%) - 2,036,263 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.03%) - 811,533,037 stalled-cycles-backend:u # 10.93% backend cycles idle (75.13%) - 25,862,271,774 instructions:u # 3.48 insn per cycle - # 0.03 stalled cycles per insn (75.13%) - 2.144395965 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95377) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.575052e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.575140e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.575140e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.353105 sec +INFO: No Floating Point Exceptions have been reported + 9,514,230,425 cycles # 2.835 GHz + 26,187,858,352 instructions # 2.75 insn per cycle + 3.357249981 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:96049) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.284060e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.284564e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.284564e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 1.000498 sec -INFO: No Floating Point Exceptions have been reported - 3,492,343,263 cycles:u # 3.481 GHz (74.68%) - 49,955,347 stalled-cycles-frontend:u # 1.43% frontend cycles idle (75.08%) - 306,069,910 stalled-cycles-backend:u # 8.76% backend cycles idle (75.29%) - 9,109,427,934 instructions:u # 2.61 insn per cycle - # 0.03 stalled cycles per insn (75.29%) - 1.007321016 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82824) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.700128e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.700595e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.700595e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.429975 sec +INFO: No Floating Point Exceptions have been reported + 4,074,429,263 cycles # 2.842 GHz + 9,249,195,343 instructions # 2.27 insn per cycle + 1.434239548 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:84390) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.266422e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.267083e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.267083e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.240358 sec +INFO: No Floating Point Exceptions have been reported + 3,512,291,376 cycles # 2.824 GHz + 8,183,196,831 instructions # 2.33 insn per cycle + 1.244579165 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:80015) (512y: 80) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.600907e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.601474e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.601474e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.469084 sec +INFO: No Floating Point Exceptions have been reported + 2,662,106,284 cycles # 1.808 GHz + 4,173,178,161 instructions # 1.57 insn per cycle + 1.473471448 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2615) (512y: 92) (512z:78910) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt index 5982c7fe15..559bd31d07 100644 --- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt @@ -1,13 +1,13 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. make: Nothing to be done for 'all'. @@ -19,33 +19,80 @@ make: Nothing to be done for 'all'. make: Nothing to be done for 'all'. -DATE: 2024-10-04_10:32:37 +make: Nothing to be done for 'all'. + +DATE: 2024-10-02_22:39:25 -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 4.671708e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.672224e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.672401e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 2.196836 sec +INFO: No Floating Point Exceptions have been reported + 7,586,412,190 cycles # 3.005 GHz + 16,831,088,475 instructions # 2.22 insn per cycle + 2.584515718 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% +......................................................................... +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +EvtsPerSec[Rmb+ME] (23) = ( 1.106090e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.106386e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.106418e+04 ) sec^-1 +MeanMatrixElemValue = ( 1.856249e-04 +- 8.329951e-05 ) GeV^-6 +TOTAL : 3.438799 sec +INFO: No Floating Point Exceptions have been reported + 11,376,125,932 cycles # 3.016 GHz + 26,554,562,579 instructions # 2.33 insn per cycle + 3.828018149 seconds time elapsed +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 9.872263e-03 +Avg ME (F77/GPU) = 9.8722599015656498E-003 +Relative difference = 3.1385249252060663e-07 +OK (relative difference <= 5E-3) ========================================================================= -Not found: /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.175474e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.175513e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.175513e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 4.492696 sec -INFO: No Floating Point Exceptions have been reported - 15,677,051,375 cycles:u # 3.488 GHz (74.88%) - 7,593,419 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.91%) - 1,678,941,636 stalled-cycles-backend:u # 10.71% backend cycles idle (74.95%) - 53,738,210,249 instructions:u # 3.43 insn per cycle - # 0.03 stalled cycles per insn (75.04%) - 4.499980758 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:44515) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 7.838588e+01 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.838795e+01 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.838795e+01 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825549e-06 ) GeV^-6 +TOTAL : 6.729469 sec +INFO: No Floating Point Exceptions have been reported + 19,118,150,644 cycles # 2.840 GHz + 54,162,338,740 instructions # 2.83 insn per cycle + 6.733611093 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:32202) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -53,34 +100,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722595861831675E-003 Relative difference = 3.457988134687711e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.497111e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.497256e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.497256e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 2.116118 sec -INFO: No Floating Point Exceptions have been reported - 7,419,969,367 cycles:u # 3.502 GHz (74.75%) - 1,956,530 stalled-cycles-frontend:u # 0.03% frontend cycles idle (74.94%) - 790,000,842 stalled-cycles-backend:u # 10.65% backend cycles idle (75.08%) - 25,753,798,107 instructions:u # 3.47 insn per cycle - # 0.03 stalled cycles per insn (75.08%) - 2.137334693 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4:95039) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.612496e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.612591e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.612591e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 3.276928 sec +INFO: No Floating Point Exceptions have been reported + 9,293,469,250 cycles # 2.833 GHz + 26,089,245,195 instructions # 2.81 insn per cycle + 3.281183397 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:95935) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -88,34 +132,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594844308162E-003 Relative difference = 3.5610570575237004e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.582380e+02 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.582941e+02 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.582941e+02 ) sec^-1 -MeanMatrixElemValue = ( 4.936475e-03 +- 4.930917e-03 ) GeV^-6 -TOTAL : 0.947983 sec -INFO: No Floating Point Exceptions have been reported - 3,318,902,094 cycles:u # 3.490 GHz (74.78%) - 491,341 stalled-cycles-frontend:u # 0.01% frontend cycles idle (74.76%) - 258,519,019 stalled-cycles-backend:u # 7.79% backend cycles idle (74.76%) - 9,040,296,434 instructions:u # 2.72 insn per cycle - # 0.03 stalled cycles per insn (74.78%) - 0.955766528 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:82125) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.692288e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.692744e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.692744e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.434426 sec +INFO: No Floating Point Exceptions have been reported + 4,061,133,652 cycles # 2.824 GHz + 9,213,647,458 instructions # 2.27 insn per cycle + 1.438661249 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:83864) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } @@ -123,16 +164,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 9.872263e-03 Avg ME (F77/C++) = 9.8722594324461913E-003 Relative difference = 3.613714310412983e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.284969e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.285585e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.285585e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.235575 sec +INFO: No Floating Point Exceptions have been reported + 3,509,658,458 cycles # 2.833 GHz + 8,168,658,311 instructions # 2.33 insn per cycle + 1.239748090 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2:79421) (512y: 230) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.726305e+02 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.726893e+02 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.726893e+02 ) sec^-1 +MeanMatrixElemValue = ( 1.187066e-05 +- 9.825548e-06 ) GeV^-6 +TOTAL : 1.419482 sec +INFO: No Floating Point Exceptions have been reported + 2,625,028,267 cycles # 1.845 GHz + 4,167,468,567 instructions # 1.59 insn per cycle + 1.423823222 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1879) (512y: 174) (512z:78884) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 9.872263e-03 +Avg ME (F77/C++) = 9.8722594324461913E-003 +Relative difference = 3.613714310412983e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt index f66367ad66..37f0f4c146 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-10-04_10:29:05 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:31:48 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.080649e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.567361e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.576990e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.219643e+03 +- 1.210703e+03 ) GeV^-2 -TOTAL : 0.364406 sec -INFO: No Floating Point Exceptions have been reported - 962,974,955 cycles:u # 2.641 GHz (75.14%) - 2,496,617 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.03%) - 5,095,285 stalled-cycles-backend:u # 0.53% backend cycles idle (76.14%) - 1,449,498,115 instructions:u # 1.51 insn per cycle - # 0.00 stalled cycles per insn (77.08%) - 0.416440309 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.834826e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.929186e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.043914e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.458579 sec +INFO: No Floating Point Exceptions have been reported + 1,990,123,139 cycles # 2.953 GHz + 2,784,480,859 instructions # 1.40 insn per cycle + 0.733197576 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.957014e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.678838e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.694069e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.605124e+02 +- 5.694382e+02 ) GeV^-2 -TOTAL : 0.489555 sec -INFO: No Floating Point Exceptions have been reported - 1,280,261,518 cycles:u # 2.506 GHz (76.89%) - 2,414,688 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.74%) - 7,024,109 stalled-cycles-backend:u # 0.55% backend cycles idle (74.98%) - 1,750,089,062 instructions:u # 1.37 insn per cycle - # 0.00 stalled cycles per insn (74.51%) - 0.548004651 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.981412e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.496464e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.730696e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.543487 sec +INFO: No Floating Point Exceptions have been reported + 2,322,895,437 cycles # 2.968 GHz + 3,227,685,027 instructions # 1.39 insn per cycle + 0.842253747 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490469 -Relative difference = 5.286902836925003e-07 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.449138e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.478107e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.478107e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.154326 sec -INFO: No Floating Point Exceptions have been reported - 4,027,250,976 cycles:u # 3.480 GHz (75.12%) - 2,661,759 stalled-cycles-frontend:u # 0.07% frontend cycles idle (75.12%) - 809,783,805 stalled-cycles-backend:u # 20.11% backend cycles idle (75.12%) - 13,130,611,823 instructions:u # 3.26 insn per cycle - # 0.06 stalled cycles per insn (75.12%) - 1.161554843 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.098188e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.121629e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.121629e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.511028 sec +INFO: No Floating Point Exceptions have been reported + 4,619,987,849 cycles # 3.050 GHz + 13,190,822,149 instructions # 2.86 insn per cycle + 1.515227589 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.509870e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.596568e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.596568e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.676181 sec -INFO: No Floating Point Exceptions have been reported - 2,361,369,338 cycles:u # 3.477 GHz (75.00%) - 2,082,729 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.27%) - 645,346,736 stalled-cycles-backend:u # 27.33% backend cycles idle (75.27%) - 7,468,617,395 instructions:u # 3.16 insn per cycle - # 0.09 stalled cycles per insn (75.27%) - 0.683368779 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3010) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.922055e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.994654e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.994654e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.871134 sec +INFO: No Floating Point Exceptions have been reported + 2,634,578,151 cycles # 3.012 GHz + 7,554,878,218 instructions # 2.87 insn per cycle + 0.875291158 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.772164e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.100284e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.100284e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.366910 sec -INFO: No Floating Point Exceptions have been reported - 1,284,395,524 cycles:u # 3.472 GHz (74.43%) - 1,919,279 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.06%) - 224,523,956 stalled-cycles-backend:u # 17.48% backend cycles idle (74.06%) - 3,088,983,186 instructions:u # 2.41 insn per cycle - # 0.07 stalled cycles per insn (74.40%) - 0.374787504 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2888) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.211416e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.420508e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.420508e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.529658 sec +INFO: No Floating Point Exceptions have been reported + 1,488,293,928 cycles # 2.791 GHz + 3,159,946,212 instructions # 2.12 insn per cycle + 0.533835521 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.512087e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.763823e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.763823e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.486021 sec +INFO: No Floating Point Exceptions have been reported + 1,346,900,449 cycles # 2.750 GHz + 3,013,892,972 instructions # 2.24 insn per cycle + 0.490326977 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2749) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.472318e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.592196e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.592196e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.683635 sec +INFO: No Floating Point Exceptions have been reported + 1,324,488,225 cycles # 1.928 GHz + 1,962,344,375 instructions # 1.48 insn per cycle + 0.687834799 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt index c1bb71aaa3..edac9efaa0 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt @@ -1,77 +1,97 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-10-04_11:15:24 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:02:33 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.208003e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.457307e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.457307e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.511863 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,531,477,497 cycles:u # 2.906 GHz (74.22%) - 6,663,983 stalled-cycles-frontend:u # 0.44% frontend cycles idle (74.53%) - 271,116,364 stalled-cycles-backend:u # 17.70% backend cycles idle (74.50%) - 1,914,127,148 instructions:u # 1.25 insn per cycle - # 0.14 stalled cycles per insn (74.91%) - 0.561318051 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.357617e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.567301e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.567301e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.480710 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,054,637,495 cycles # 2.959 GHz + 3,064,097,821 instructions # 1.49 insn per cycle + 0.751345984 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.016150e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.168560e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.168560e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.217284e+03 +- 8.156969e+02 ) GeV^-2 -TOTAL : 1.118560 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,337,834,991 cycles:u # 2.897 GHz (74.89%) - 16,778,113 stalled-cycles-frontend:u # 0.50% frontend cycles idle (74.69%) - 838,698,020 stalled-cycles-backend:u # 25.13% backend cycles idle (75.00%) - 3,491,444,280 instructions:u # 1.05 insn per cycle - # 0.24 stalled cycles per insn (75.00%) - 1.192167863 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 3.284276e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.260264e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.260264e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.756366 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,992,488,404 cycles # 2.973 GHz + 4,533,320,753 instructions # 1.51 insn per cycle + 1.065306552 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -79,36 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490469 -Relative difference = 5.286902836925003e-07 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.409731e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.436914e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.436914e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.190081 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 4,165,664,768 cycles:u # 3.490 GHz (74.60%) - 1,999,089 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.91%) - 936,146,357 stalled-cycles-backend:u # 22.47% backend cycles idle (75.19%) - 13,139,188,653 instructions:u # 3.15 insn per cycle - # 0.07 stalled cycles per insn (75.20%) - 1.197508250 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.096875e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.120294e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.120294e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.518699 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,650,030,796 cycles # 3.055 GHz + 13,198,473,845 instructions # 2.84 insn per cycle + 1.523176274 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 707) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -116,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.509767e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.596497e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.596497e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.680631 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 2,380,899,151 cycles:u # 3.481 GHz (74.36%) - 2,045,113 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.37%) - 645,751,963 stalled-cycles-backend:u # 27.12% backend cycles idle (74.92%) - 7,502,034,938 instructions:u # 3.15 insn per cycle - # 0.09 stalled cycles per insn (75.41%) - 0.687987997 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3010) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.939375e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.011645e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.011645e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.870214 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,669,019,724 cycles # 3.054 GHz + 7,604,492,901 instructions # 2.85 insn per cycle + 0.874664100 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -153,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.752340e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.077854e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.077854e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.372240 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,294,679,735 cycles:u # 3.448 GHz (74.46%) - 2,052,234 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.45%) - 224,523,524 stalled-cycles-backend:u # 17.34% backend cycles idle (74.45%) - 3,103,979,727 instructions:u # 2.40 insn per cycle - # 0.07 stalled cycles per insn (74.56%) - 0.379789000 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2888) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.240225e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.449199e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.449199e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.531313 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,520,382,878 cycles # 2.841 GHz + 3,208,340,410 instructions # 2.11 insn per cycle + 0.535666139 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2991) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -190,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.608215e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.869332e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.869332e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.480406 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,381,392,428 cycles # 2.852 GHz + 3,064,436,632 instructions # 2.22 insn per cycle + 0.484872552 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2749) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.420993e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.538745e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.538745e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.705713 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,370,817,527 cycles # 1.932 GHz + 2,002,052,233 instructions # 1.46 insn per cycle + 0.710306404 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1379) (512y: 106) (512z: 2218) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt index 862764ef6e..f87fba715e 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-10-04_10:29:12 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:32:01 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.150743e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.704934e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.715045e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.219643e+03 +- 1.210703e+03 ) GeV^-2 -TOTAL : 0.351090 sec -INFO: No Floating Point Exceptions have been reported - 927,250,962 cycles:u # 2.542 GHz (74.61%) - 2,564,965 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.90%) - 4,834,497 stalled-cycles-backend:u # 0.52% backend cycles idle (72.46%) - 1,462,832,615 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (75.46%) - 0.406691727 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.806684e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.878937e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.003620e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.457908 sec +INFO: No Floating Point Exceptions have been reported + 1,992,366,483 cycles # 2.953 GHz + 2,806,396,880 instructions # 1.41 insn per cycle + 0.732986277 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.160706e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.014223e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.031032e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.605124e+02 +- 5.694382e+02 ) GeV^-2 -TOTAL : 0.503853 sec -INFO: No Floating Point Exceptions have been reported - 1,271,560,375 cycles:u # 2.529 GHz (74.93%) - 2,393,764 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.82%) - 9,992,356 stalled-cycles-backend:u # 0.79% backend cycles idle (75.80%) - 1,777,974,435 instructions:u # 1.40 insn per cycle - # 0.01 stalled cycles per insn (76.74%) - 0.563738720 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.961222e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.420833e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.640275e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.537970 sec +INFO: No Floating Point Exceptions have been reported + 2,313,496,127 cycles # 2.973 GHz + 3,286,265,008 instructions # 1.42 insn per cycle + 0.835500868 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 -Avg ME (F77/GPU) = 0.14247482467490469 -Relative difference = 5.286902836925003e-07 +Avg ME (F77/GPU) = 0.14247482467490466 +Relative difference = 5.286902838873106e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.438309e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.466502e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.466502e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.162705 sec -INFO: No Floating Point Exceptions have been reported - 4,063,566,371 cycles:u # 3.486 GHz (74.61%) - 2,470,567 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.80%) - 751,456,331 stalled-cycles-backend:u # 18.49% backend cycles idle (75.15%) - 13,131,258,870 instructions:u # 3.23 insn per cycle - # 0.06 stalled cycles per insn (75.30%) - 1.170023945 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 720) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.095939e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.118909e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.118909e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.513926 sec +INFO: No Floating Point Exceptions have been reported + 4,617,878,876 cycles # 3.044 GHz + 13,179,768,298 instructions # 2.85 insn per cycle + 1.518148487 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 692) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499481 Relative difference = 5.286896511435107e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.469436e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.553907e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.553907e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.687246 sec -INFO: No Floating Point Exceptions have been reported - 2,401,871,924 cycles:u # 3.480 GHz (74.52%) - 1,948,566 stalled-cycles-frontend:u # 0.08% frontend cycles idle (74.51%) - 617,453,605 stalled-cycles-backend:u # 25.71% backend cycles idle (74.43%) - 7,491,115,990 instructions:u # 3.12 insn per cycle - # 0.08 stalled cycles per insn (75.01%) - 0.694299928 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3003) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.958372e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033582e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033582e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.854860 sec +INFO: No Floating Point Exceptions have been reported + 2,637,650,061 cycles # 3.073 GHz + 7,552,993,704 instructions # 2.86 insn per cycle + 0.859000708 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467499475 Relative difference = 5.286896515331313e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.735717e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.057899e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.057899e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.369508 sec -INFO: No Floating Point Exceptions have been reported - 1,286,022,326 cycles:u # 3.453 GHz (74.27%) - 1,818,073 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.24%) - 305,110,657 stalled-cycles-backend:u # 23.73% backend cycles idle (74.24%) - 3,083,688,111 instructions:u # 2.40 insn per cycle - # 0.10 stalled cycles per insn (74.60%) - 0.376414098 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2873) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.291817e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.503784e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.503784e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.516316 sec +INFO: No Floating Point Exceptions have been reported + 1,490,683,274 cycles # 2.867 GHz + 3,158,884,365 instructions # 2.12 insn per cycle + 0.520526770 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2976) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482467492589 Relative difference = 5.286901348574438e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.689767e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.957818e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.957818e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.462577 sec +INFO: No Floating Point Exceptions have been reported + 1,342,018,810 cycles # 2.879 GHz + 3,010,796,760 instructions # 2.24 insn per cycle + 0.466768744 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2726) (512y: 104) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.497346e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.619356e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.619356e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.676874 sec +INFO: No Floating Point Exceptions have been reported + 1,324,736,218 cycles # 1.948 GHz + 1,960,830,009 instructions # 1.48 insn per cycle + 0.681118880 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1356) (512y: 106) (512z: 2218) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482467492589 +Relative difference = 5.286901348574438e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt index f61a80ed95..ea31adf683 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-10-04_10:29:18 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:32:15 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.377727e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.319503e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.328467e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.205132e+03 +- 5.720277e+03 ) GeV^-2 -TOTAL : 0.320572 sec -INFO: No Floating Point Exceptions have been reported - 803,651,652 cycles:u # 2.440 GHz (75.78%) - 2,488,663 stalled-cycles-frontend:u # 0.31% frontend cycles idle (75.23%) - 5,317,157 stalled-cycles-backend:u # 0.66% backend cycles idle (75.05%) - 1,355,208,615 instructions:u # 1.69 insn per cycle - # 0.00 stalled cycles per insn (74.27%) - 0.372732518 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.702651e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.950700e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.099951e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.451239 sec +INFO: No Floating Point Exceptions have been reported + 1,977,484,525 cycles # 2.954 GHz + 2,783,351,249 instructions # 1.41 insn per cycle + 0.726735040 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.816019e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.474846e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.485746e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.171486e+04 +- 7.161170e+04 ) GeV^-2 -TOTAL : 0.422699 sec -INFO: No Floating Point Exceptions have been reported - 1,029,982,976 cycles:u # 2.442 GHz (74.04%) - 2,458,628 stalled-cycles-frontend:u # 0.24% frontend cycles idle (75.67%) - 5,130,049 stalled-cycles-backend:u # 0.50% backend cycles idle (75.81%) - 1,524,387,376 instructions:u # 1.48 insn per cycle - # 0.00 stalled cycles per insn (75.35%) - 0.481909964 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.338269e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.447507e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.811164e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 +TOTAL : 0.491472 sec +INFO: No Floating Point Exceptions have been reported + 2,126,978,214 cycles # 2.918 GHz + 2,967,166,452 instructions # 1.40 insn per cycle + 0.787773473 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.424322e-01 -Avg ME (F77/GPU) = 0.14247950478971561 -Relative difference = 0.0003321214564936614 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.650625e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.689429e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.689429e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 -TOTAL : 1.014202 sec -INFO: No Floating Point Exceptions have been reported - 3,536,784,161 cycles:u # 3.478 GHz (74.89%) - 1,844,458 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.83%) - 400,984,416 stalled-cycles-backend:u # 11.34% backend cycles idle (74.83%) - 12,888,814,241 instructions:u # 3.64 insn per cycle - # 0.03 stalled cycles per insn (74.83%) - 1.021297393 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.154245e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.180927e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.180927e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.437058 sec +INFO: No Floating Point Exceptions have been reported + 4,402,948,339 cycles # 3.057 GHz + 12,951,871,317 instructions # 2.94 insn per cycle + 1.441082878 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246858320096933 -Relative difference = 1.1791391693704193e-07 +Avg ME (F77/C++) = 0.14246861273719524 +Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.250998e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.520136e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.520136e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 -TOTAL : 0.406891 sec -INFO: No Floating Point Exceptions have been reported - 1,423,239,046 cycles:u # 3.474 GHz (74.79%) - 1,718,996 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.63%) - 484,659,717 stalled-cycles-backend:u # 34.05% backend cycles idle (74.63%) - 4,303,460,822 instructions:u # 3.02 insn per cycle - # 0.11 stalled cycles per insn (74.63%) - 0.413887732 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3392) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.851169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.029409e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.029409e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.592102 sec +INFO: No Floating Point Exceptions have been reported + 1,729,947,177 cycles # 2.905 GHz + 4,542,920,425 instructions # 2.63 insn per cycle + 0.596239608 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424687e-01 -Avg ME (F77/C++) = 0.14246865423667998 -Relative difference = 3.2121666037785094e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862329122401 +Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.931441e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.913616e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.913616e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 -TOTAL : 0.228275 sec -INFO: No Floating Point Exceptions have been reported - 794,261,276 cycles:u # 3.436 GHz (73.52%) - 1,831,772 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.20%) - 225,154,218 stalled-cycles-backend:u # 28.35% backend cycles idle (75.79%) - 1,861,340,575 instructions:u # 2.34 insn per cycle - # 0.12 stalled cycles per insn (75.79%) - 0.235258451 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3488) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.840593e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.576208e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.576208e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.297766 sec +INFO: No Floating Point Exceptions have been reported + 857,398,073 cycles # 2.846 GHz + 1,917,934,137 instructions # 2.24 insn per cycle + 0.301767368 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490118064832 -Relative difference = 8.286711056488833e-09 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.022252e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.815506e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.815506e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.289819 sec +INFO: No Floating Point Exceptions have been reported + 805,893,210 cycles # 2.747 GHz + 1,834,128,170 instructions # 2.28 insn per cycle + 0.293996379 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3400) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.730274e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.196749e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.196749e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.366135 sec +INFO: No Floating Point Exceptions have been reported + 730,443,209 cycles # 1.976 GHz + 1,308,748,067 instructions # 1.79 insn per cycle + 0.370229298 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 24) (512z: 2435) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491576758442 +Relative difference = 1.1066920862943416e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt index 8a463e21a7..171a938e2f 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt @@ -1,77 +1,97 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-10-04_11:15:31 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:02:46 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.020725e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.186535e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.186535e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.955602e+02 +- 1.188241e+02 ) GeV^-2 -TOTAL : 0.478907 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,420,125,800 cycles:u # 2.887 GHz (75.11%) - 11,211,403 stalled-cycles-frontend:u # 0.79% frontend cycles idle (75.10%) - 262,078,503 stalled-cycles-backend:u # 18.45% backend cycles idle (74.86%) - 1,896,380,021 instructions:u # 1.34 insn per cycle - # 0.14 stalled cycles per insn (73.88%) - 0.531984168 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.066919e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.361842e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.361842e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.017654e+01 +- 1.429183e+01 ) GeV^-2 +TOTAL : 0.460364 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,990,366,635 cycles # 2.956 GHz + 2,905,841,235 instructions # 1.46 insn per cycle + 0.730162203 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge +WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost +WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384) +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 1 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP= WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost -WARNING! RamboHost selected: cannot use HiprandDevice, will use CommonRandom (as HiprandHost is not implemented yet) +WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288) -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:COMMON+RMBHST+BRDDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 4.747782e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.141361e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.141361e+07 ) sec^-1 -MeanMatrixElemValue = ( 1.184227e+03 +- 7.941570e+02 ) GeV^-2 -TOTAL : 1.009049 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,053,459,163 cycles:u # 2.959 GHz (75.31%) - 29,478,807 stalled-cycles-frontend:u # 0.97% frontend cycles idle (75.66%) - 840,754,925 stalled-cycles-backend:u # 27.53% backend cycles idle (74.67%) - 3,346,412,800 instructions:u # 1.10 insn per cycle - # 0.25 stalled cycles per insn (74.67%) - 1.071619763 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.138480e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.921745e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.921745e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.609941e+02 +- 2.115589e+02 ) GeV^-2 +TOTAL : 0.626871 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 2,570,592,828 cycles # 2.938 GHz + 3,830,625,555 instructions # 1.49 insn per cycle + 0.931187767 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -79,36 +99,35 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.424322e-01 -Avg ME (F77/GPU) = 0.14247950478971561 -Relative difference = 0.0003321214564936614 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.652830e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.691788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.691788e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 -TOTAL : 1.014850 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 3,544,674,257 cycles:u # 3.482 GHz (74.87%) - 1,715,233 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.86%) - 399,646,581 stalled-cycles-backend:u # 11.27% backend cycles idle (74.86%) - 12,880,885,169 instructions:u # 3.63 insn per cycle - # 0.03 stalled cycles per insn (74.86%) - 1.021880296 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 727) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.145066e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.171268e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.171268e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.451272 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 4,420,042,371 cycles # 3.039 GHz + 12,957,560,789 instructions # 2.93 insn per cycle + 1.455401506 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 645) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -116,36 +135,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246858320096933 -Relative difference = 1.1791391693704193e-07 +Avg ME (F77/C++) = 0.14246861273719524 +Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.122119e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.374377e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.374377e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 -TOTAL : 0.421621 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 1,455,852,865 cycles:u # 3.429 GHz (75.45%) - 1,801,388 stalled-cycles-frontend:u # 0.12% frontend cycles idle (75.52%) - 518,485,359 stalled-cycles-backend:u # 35.61% backend cycles idle (75.52%) - 4,311,204,773 instructions:u # 2.96 insn per cycle - # 0.12 stalled cycles per insn (75.52%) - 0.429595072 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3392) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.984297e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.170633e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.170633e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.570146 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 1,748,150,599 cycles # 3.047 GHz + 4,590,399,718 instructions # 2.63 insn per cycle + 0.574229373 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -153,36 +169,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424687e-01 -Avg ME (F77/C++) = 0.14246865423667998 -Relative difference = 3.2121666037785094e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862329122401 +Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW WARNING! Instantiate host Bridge (nevt=16384) -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+BRDHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.898517e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.853816e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.853816e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 -TOTAL : 0.231864 sec -INFO: No Floating Point Exceptions have been reported -INFO: No Floating Point Exceptions have been reported - 810,604,946 cycles:u # 3.450 GHz (72.09%) - 1,903,581 stalled-cycles-frontend:u # 0.23% frontend cycles idle (73.94%) - 222,245,004 stalled-cycles-backend:u # 27.42% backend cycles idle (75.61%) - 1,888,821,187 instructions:u # 2.33 insn per cycle - # 0.12 stalled cycles per insn (76.18%) - 0.238946046 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3488) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.872273e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.592788e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.592788e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.300259 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 875,448,713 cycles # 2.882 GHz + 1,954,867,221 instructions # 2.23 insn per cycle + 0.304452268 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3580) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -190,16 +203,80 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490118064832 -Relative difference = 8.286711056488833e-09 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.281096e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.128992e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.128992e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.282309 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 821,270,186 cycles # 2.872 GHz + 1,871,027,279 instructions # 2.28 insn per cycle + 0.286525778 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3400) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +WARNING! Instantiate host Bridge (nevt=16384) +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.718318e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.194314e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.194314e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.370922 sec +INFO: No Floating Point Exceptions have been reported +INFO: No Floating Point Exceptions have been reported + 748,872,143 cycles # 2.000 GHz + 1,350,116,546 instructions # 1.80 insn per cycle + 0.375129376 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1964) (512y: 24) (512z: 2435) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491576758442 +Relative difference = 1.1066920862943416e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt index 5af0f6ea0a..2256daf6c3 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-10-04_10:29:24 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:32:27 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 8.082066e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.215210e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.223122e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.205132e+03 +- 5.720277e+03 ) GeV^-2 -TOTAL : 0.317799 sec -INFO: No Floating Point Exceptions have been reported - 844,589,664 cycles:u # 2.584 GHz (73.78%) - 2,509,111 stalled-cycles-frontend:u # 0.30% frontend cycles idle (74.31%) - 12,245,889 stalled-cycles-backend:u # 1.45% backend cycles idle (74.70%) - 1,354,529,485 instructions:u # 1.60 insn per cycle - # 0.01 stalled cycles per insn (74.50%) - 0.371487604 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.702298e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.990170e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.136648e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018174e+01 +- 1.429492e+01 ) GeV^-2 +TOTAL : 0.449421 sec +INFO: No Floating Point Exceptions have been reported + 1,950,583,088 cycles # 2.925 GHz + 2,701,544,767 instructions # 1.38 insn per cycle + 0.724364608 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 169 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.705812e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.228243e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.237115e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.171486e+04 +- 7.161170e+04 ) GeV^-2 -TOTAL : 0.403616 sec -INFO: No Floating Point Exceptions have been reported - 1,063,510,607 cycles:u # 2.535 GHz (75.56%) - 2,307,789 stalled-cycles-frontend:u # 0.22% frontend cycles idle (75.82%) - 8,084,554 stalled-cycles-backend:u # 0.76% backend cycles idle (74.87%) - 1,664,889,093 instructions:u # 1.57 insn per cycle - # 0.00 stalled cycles per insn (72.80%) - 0.462869624 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.344116e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.482358e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.864758e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.571360e+02 +- 2.114020e+02 ) GeV^-2 +TOTAL : 0.487785 sec +INFO: No Floating Point Exceptions have been reported + 2,122,439,624 cycles # 2.960 GHz + 3,010,905,785 instructions # 1.42 insn per cycle + 0.774447089 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 1.424322e-01 -Avg ME (F77/GPU) = 0.14247950479185079 -Relative difference = 0.00033212147148451967 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 1.424226e-01 +Avg ME (F77/GPU) = 0.14247487904286338 +Relative difference = 0.0003670698531228044 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.637062e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.675221e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.675221e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945526e+02 +- 1.186197e+02 ) GeV^-2 -TOTAL : 1.022211 sec -INFO: No Floating Point Exceptions have been reported - 3,589,849,070 cycles:u # 3.502 GHz (74.85%) - 1,729,282 stalled-cycles-frontend:u # 0.05% frontend cycles idle (75.03%) - 525,569,379 stalled-cycles-backend:u # 14.64% backend cycles idle (75.03%) - 12,871,759,204 instructions:u # 3.59 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 1.029456479 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 718) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.149657e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.175819e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.175819e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 1.442333 sec +INFO: No Floating Point Exceptions have been reported + 4,403,161,402 cycles # 3.046 GHz + 12,927,638,091 instructions # 2.94 insn per cycle + 1.446362002 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 630) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424686e-01 -Avg ME (F77/C++) = 0.14246858320096933 -Relative difference = 1.1791391693704193e-07 +Avg ME (F77/C++) = 0.14246861273719524 +Relative difference = 8.940352641194861e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.193624e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.454281e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.454281e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.945528e+02 +- 1.186199e+02 ) GeV^-2 -TOTAL : 0.411966 sec -INFO: No Floating Point Exceptions have been reported - 1,442,483,013 cycles:u # 3.478 GHz (75.10%) - 1,744,540 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.94%) - 489,961,029 stalled-cycles-backend:u # 33.97% backend cycles idle (74.94%) - 4,296,198,664 instructions:u # 2.98 insn per cycle - # 0.11 stalled cycles per insn (74.94%) - 0.419091231 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3379) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.989413e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.176290e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.176290e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018564e+01 +- 1.429903e+01 ) GeV^-2 +TOTAL : 0.564589 sec +INFO: No Floating Point Exceptions have been reported + 1,725,063,093 cycles # 3.036 GHz + 4,536,592,580 instructions # 2.63 insn per cycle + 0.568805063 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3611) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 1.424687e-01 -Avg ME (F77/C++) = 0.14246865423667998 -Relative difference = 3.2121666037785094e-07 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424686e-01 +Avg ME (F77/C++) = 0.14246862329122401 +Relative difference = 1.6348320966878032e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.968545e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.937029e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.937029e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.947131e+02 +- 1.186881e+02 ) GeV^-2 -TOTAL : 0.226723 sec -INFO: No Floating Point Exceptions have been reported - 778,463,869 cycles:u # 3.391 GHz (75.62%) - 1,805,704 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.62%) - 241,725,364 stalled-cycles-backend:u # 31.05% backend cycles idle (75.62%) - 1,852,884,590 instructions:u # 2.38 insn per cycle - # 0.13 stalled cycles per insn (75.62%) - 0.233668425 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3463) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.871312e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.604631e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.604631e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.296052 sec +INFO: No Floating Point Exceptions have been reported + 857,546,580 cycles # 2.863 GHz + 1,914,366,165 instructions # 2.23 insn per cycle + 0.300067432 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3549) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 -Avg ME (F77/C++) = 0.14247490118064832 -Relative difference = 8.286711056488833e-09 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.287189e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.128303e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.128303e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018828e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.277637 sec +INFO: No Floating Point Exceptions have been reported + 802,533,820 cycles # 2.856 GHz + 1,829,848,597 instructions # 2.28 insn per cycle + 0.281575570 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3364) (512y: 22) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491543012991 +Relative difference = 1.0830068962165901e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.755061e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.233949e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.233949e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018829e+01 +- 1.429922e+01 ) GeV^-2 +TOTAL : 0.364070 sec +INFO: No Floating Point Exceptions have been reported + 730,229,495 cycles # 1.987 GHz + 1,306,200,417 instructions # 1.79 insn per cycle + 0.368140152 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1928) (512y: 24) (512z: 2435) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247491576758442 +Relative difference = 1.1066920862943416e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt index 4e7a959012..d81706c8fb 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-10-04_10:29:30 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:32:39 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.550777e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.684021e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.686291e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.219643e+03 +- 1.210703e+03 ) GeV^-2 -TOTAL : 0.467527 sec -INFO: No Floating Point Exceptions have been reported - 1,253,452,808 cycles:u # 2.727 GHz (76.13%) - 2,883,239 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.27%) - 8,880,480 stalled-cycles-backend:u # 0.71% backend cycles idle (73.55%) - 1,677,319,380 instructions:u # 1.34 insn per cycle - # 0.01 stalled cycles per insn (73.26%) - 0.516224704 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.762491e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.836111e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.951794e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.455722 sec +INFO: No Floating Point Exceptions have been reported + 1,975,760,031 cycles # 2.935 GHz + 2,772,242,722 instructions # 1.40 insn per cycle + 0.730835336 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.999859e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.721066e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.736492e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.605124e+02 +- 5.694382e+02 ) GeV^-2 -TOTAL : 0.486288 sec -INFO: No Floating Point Exceptions have been reported - 1,266,298,701 cycles:u # 2.496 GHz (75.97%) - 2,417,057 stalled-cycles-frontend:u # 0.19% frontend cycles idle (76.65%) - 7,438,043 stalled-cycles-backend:u # 0.59% backend cycles idle (75.86%) - 1,822,767,707 instructions:u # 1.44 insn per cycle - # 0.00 stalled cycles per insn (73.80%) - 0.547651674 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.992470e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.540289e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.772038e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.539165 sec +INFO: No Floating Point Exceptions have been reported + 2,324,912,396 cycles # 2.969 GHz + 3,295,857,552 instructions # 1.42 insn per cycle + 0.840288561 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482577104625 Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.463112e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.492510e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.492510e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.143575 sec -INFO: No Floating Point Exceptions have been reported - 3,997,523,281 cycles:u # 3.486 GHz (74.99%) - 1,905,658 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.89%) - 516,672,849 stalled-cycles-backend:u # 12.92% backend cycles idle (74.89%) - 13,130,248,081 instructions:u # 3.28 insn per cycle - # 0.04 stalled cycles per insn (74.89%) - 1.151446300 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 706) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.097209e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.120361e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.120361e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.512296 sec +INFO: No Floating Point Exceptions have been reported + 4,639,671,723 cycles # 3.061 GHz + 13,178,453,080 instructions # 2.84 insn per cycle + 1.516607479 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 681) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.513522e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.600407e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.600407e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.675411 sec -INFO: No Floating Point Exceptions have been reported - 2,363,364,099 cycles:u # 3.484 GHz (74.84%) - 2,086,161 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.24%) - 572,220,288 stalled-cycles-backend:u # 24.21% backend cycles idle (75.24%) - 7,436,302,025 instructions:u # 3.15 insn per cycle - # 0.08 stalled cycles per insn (75.24%) - 0.682583845 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3104) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.927117e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.999096e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.999096e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.868650 sec +INFO: No Floating Point Exceptions have been reported + 2,644,248,242 cycles # 3.032 GHz + 7,473,014,363 instructions # 2.83 insn per cycle + 0.872842396 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3152) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.842606e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.180850e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.180850e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.361999 sec -INFO: No Floating Point Exceptions have been reported - 1,257,591,048 cycles:u # 3.446 GHz (73.72%) - 1,838,682 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.43%) - 367,694,710 stalled-cycles-backend:u # 29.24% backend cycles idle (75.52%) - 3,030,416,443 instructions:u # 2.41 insn per cycle - # 0.12 stalled cycles per insn (75.90%) - 0.369270773 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3024) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.309998e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.525678e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.525678e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.513914 sec +INFO: No Floating Point Exceptions have been reported + 1,471,858,704 cycles # 2.848 GHz + 3,126,825,800 instructions # 2.12 insn per cycle + 0.518256433 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3133) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.744395e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.024619e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.024619e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.456745 sec +INFO: No Floating Point Exceptions have been reported + 1,318,209,963 cycles # 2.863 GHz + 2,981,428,844 instructions # 2.26 insn per cycle + 0.461015665 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.415670e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.528359e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.528359e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.699082 sec +INFO: No Floating Point Exceptions have been reported + 1,360,436,298 cycles # 1.937 GHz + 1,989,825,380 instructions # 1.46 insn per cycle + 0.703247363 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1679) (512y: 108) (512z: 2251) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt index bd70ad90bb..4385bdd6af 100644 --- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -DATE: 2024-10-04_10:29:36 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_22:32:53 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.139644e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.654366e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.664699e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.219643e+03 +- 1.210703e+03 ) GeV^-2 -TOTAL : 0.350320 sec -INFO: No Floating Point Exceptions have been reported - 978,388,626 cycles:u # 2.688 GHz (74.41%) - 2,647,867 stalled-cycles-frontend:u # 0.27% frontend cycles idle (73.96%) - 6,328,070 stalled-cycles-backend:u # 0.65% backend cycles idle (74.38%) - 1,504,365,616 instructions:u # 1.54 insn per cycle - # 0.00 stalled cycles per insn (75.62%) - 0.403747700 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.778483e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.885440e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.000351e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.455432 sec +INFO: No Floating Point Exceptions have been reported + 1,987,161,261 cycles # 2.956 GHz + 2,799,045,356 instructions # 1.41 insn per cycle + 0.729366827 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.175064e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.942863e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.959336e+06 ) sec^-1 -MeanMatrixElemValue = ( 6.605124e+02 +- 5.694382e+02 ) GeV^-2 -TOTAL : 0.481624 sec -INFO: No Floating Point Exceptions have been reported - 1,269,901,645 cycles:u # 2.528 GHz (75.34%) - 2,461,659 stalled-cycles-frontend:u # 0.19% frontend cycles idle (75.34%) - 5,825,646 stalled-cycles-backend:u # 0.46% backend cycles idle (76.47%) - 1,815,054,012 instructions:u # 1.43 insn per cycle - # 0.00 stalled cycles per insn (75.78%) - 0.543801715 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.953178e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.419365e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.640921e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.602505e+02 +- 2.116328e+02 ) GeV^-2 +TOTAL : 0.537410 sec +INFO: No Floating Point Exceptions have been reported + 2,307,597,745 cycles # 2.969 GHz + 3,283,930,647 instructions # 1.42 insn per cycle + 0.834536652 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.424749e-01 Avg ME (F77/GPU) = 0.14247482577104625 Relative difference = 5.209967070245855e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.463537e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.492756e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.492756e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 1.142870 sec -INFO: No Floating Point Exceptions have been reported - 3,995,713,367 cycles:u # 3.487 GHz (74.87%) - 1,908,462 stalled-cycles-frontend:u # 0.05% frontend cycles idle (74.87%) - 706,846,408 stalled-cycles-backend:u # 17.69% backend cycles idle (74.87%) - 13,129,808,915 instructions:u # 3.29 insn per cycle - # 0.05 stalled cycles per insn (74.87%) - 1.150382779 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 697) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.090474e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.113459e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.113459e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 1.521513 sec +INFO: No Floating Point Exceptions have been reported + 4,642,408,622 cycles # 3.044 GHz + 13,166,526,592 instructions # 2.84 insn per cycle + 1.525661892 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 666) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.529128e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.617778e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.617778e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.671420 sec -INFO: No Floating Point Exceptions have been reported - 2,340,956,935 cycles:u # 3.471 GHz (75.10%) - 1,957,506 stalled-cycles-frontend:u # 0.08% frontend cycles idle (75.10%) - 596,609,152 stalled-cycles-backend:u # 25.49% backend cycles idle (75.10%) - 7,452,557,298 instructions:u # 3.18 insn per cycle - # 0.08 stalled cycles per insn (75.10%) - 0.678666189 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.922918e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.995508e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.995508e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.870529 sec +INFO: No Floating Point Exceptions have been reported + 2,636,402,305 cycles # 3.016 GHz + 7,475,113,402 instructions # 2.84 insn per cycle + 0.874675780 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482734618697 Relative difference = 5.099411406595165e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.779457e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.111075e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.111075e+05 ) sec^-1 -MeanMatrixElemValue = ( 1.914935e+02 +- 1.163297e+02 ) GeV^-2 -TOTAL : 0.365983 sec -INFO: No Floating Point Exceptions have been reported - 1,278,426,865 cycles:u # 3.465 GHz (74.21%) - 1,921,745 stalled-cycles-frontend:u # 0.15% frontend cycles idle (74.00%) - 293,464,402 stalled-cycles-backend:u # 22.96% backend cycles idle (74.10%) - 3,049,353,775 instructions:u # 2.39 insn per cycle - # 0.10 stalled cycles per insn (74.94%) - 0.373312575 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3002) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.327635e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.552954e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.552954e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.510959 sec +INFO: No Floating Point Exceptions have been reported + 1,472,054,188 cycles # 2.861 GHz + 3,127,403,529 instructions # 2.12 insn per cycle + 0.515241692 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3111) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.424749e-01 Avg ME (F77/C++) = 0.14247482643254802 Relative difference = 5.163537715318965e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.751588e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.026290e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.026290e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.455204 sec +INFO: No Floating Point Exceptions have been reported + 1,320,153,544 cycles # 2.877 GHz + 2,981,574,848 instructions # 2.26 insn per cycle + 0.459378563 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2871) (512y: 110) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.424669e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.537772e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.537772e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.018083e+01 +- 1.429474e+01 ) GeV^-2 +TOTAL : 0.696909 sec +INFO: No Floating Point Exceptions have been reported + 1,363,054,761 cycles # 1.945 GHz + 1,990,224,700 instructions # 1.46 insn per cycle + 0.701261631 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1655) (512y: 108) (512z: 2251) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.424749e-01 +Avg ME (F77/C++) = 0.14247482643254802 +Relative difference = 5.163537715318965e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt index d954d137a8..8c3e307fe5 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-10-04_11:52:12 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:24:59 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.548876e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.878752e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.890800e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.428405 sec -INFO: No Floating Point Exceptions have been reported - 1,043,110,697 cycles:u # 2.414 GHz (75.87%) - 2,510,213 stalled-cycles-frontend:u # 0.24% frontend cycles idle (76.40%) - 10,356,025 stalled-cycles-backend:u # 0.99% backend cycles idle (75.48%) - 1,549,103,394 instructions:u # 1.49 insn per cycle - # 0.01 stalled cycles per insn (74.97%) - 0.488419891 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.189379e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.854347e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.468984e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.532180 sec +INFO: No Floating Point Exceptions have been reported + 2,219,216,234 cycles # 2.899 GHz + 3,174,009,870 instructions # 1.43 insn per cycle + 0.825106849 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110271 -Relative difference = 2.1036162350152416e-07 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.291565e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.341074e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.341074e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.761272 sec -INFO: No Floating Point Exceptions have been reported - 16,413,565,928 cycles:u # 3.439 GHz (74.91%) - 9,168,673 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) - 2,000,498,527 stalled-cycles-backend:u # 12.19% backend cycles idle (75.02%) - 51,616,234,124 instructions:u # 3.14 insn per cycle - # 0.04 stalled cycles per insn (75.03%) - 4.777715028 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 746) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.678393e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.716890e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.716890e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.356728 sec +INFO: No Floating Point Exceptions have been reported + 19,323,098,467 cycles # 3.038 GHz + 51,924,439,414 instructions # 2.69 insn per cycle + 6.362461259 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 668) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.901907e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.055059e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.055059e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.870038 sec -INFO: No Floating Point Exceptions have been reported - 9,748,609,775 cycles:u # 3.383 GHz (75.02%) - 9,234,368 stalled-cycles-frontend:u # 0.09% frontend cycles idle (75.02%) - 3,080,077,738 stalled-cycles-backend:u # 31.60% backend cycles idle (75.02%) - 30,688,640,376 instructions:u # 3.15 insn per cycle - # 0.10 stalled cycles per insn (75.04%) - 2.886309143 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2833) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.021374e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.160318e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.160318e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.578860 sec +INFO: No Floating Point Exceptions have been reported + 10,923,994,538 cycles # 3.048 GHz + 30,795,051,014 instructions # 2.82 insn per cycle + 3.584731673 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2915) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.969814e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.446981e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.446981e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.686665 sec -INFO: No Floating Point Exceptions have been reported - 5,601,471,902 cycles:u # 3.298 GHz (75.08%) - 8,267,317 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.04%) - 1,288,835,229 stalled-cycles-backend:u # 23.01% backend cycles idle (75.04%) - 13,373,121,064 instructions:u # 2.39 insn per cycle - # 0.10 stalled cycles per insn (75.04%) - 1.702912365 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2817) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.869937e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.224318e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.224318e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.261390 sec +INFO: No Floating Point Exceptions have been reported + 6,498,269,514 cycles # 2.867 GHz + 13,665,834,043 instructions # 2.10 insn per cycle + 2.267304210 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2941) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.324016e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.747508e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.747508e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.078273 sec +INFO: No Floating Point Exceptions have been reported + 5,947,948,769 cycles # 2.855 GHz + 13,008,169,729 instructions # 2.19 insn per cycle + 2.084199816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2667) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.663058e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.855570e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.855570e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.970789 sec +INFO: No Floating Point Exceptions have been reported + 5,847,713,634 cycles # 1.965 GHz + 8,587,473,758 instructions # 1.47 insn per cycle + 2.976683697 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1506) (512y: 128) (512z: 1946) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt index 8904cc9c5f..70b1342c04 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-10-04_11:52:25 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:25:25 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.647769e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.014068e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.027873e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.409756 sec -INFO: No Floating Point Exceptions have been reported - 1,010,035,303 cycles:u # 2.364 GHz (75.46%) - 2,553,485 stalled-cycles-frontend:u # 0.25% frontend cycles idle (75.92%) - 7,861,623 stalled-cycles-backend:u # 0.78% backend cycles idle (75.02%) - 1,595,464,135 instructions:u # 1.58 insn per cycle - # 0.00 stalled cycles per insn (74.81%) - 0.471662306 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.145206e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.750029e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.339208e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.539827 sec +INFO: No Floating Point Exceptions have been reported + 2,187,035,010 cycles # 2.816 GHz + 3,118,040,099 instructions # 1.43 insn per cycle + 0.835459641 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 -Avg ME (F77/GPU) = 4.3134710926110271 -Relative difference = 2.1036162350152416e-07 +Avg ME (F77/GPU) = 4.3134710926110280 +Relative difference = 2.1036162329561614e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.373515e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.427207e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.427207e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.601523 sec -INFO: No Floating Point Exceptions have been reported - 15,839,223,004 cycles:u # 3.433 GHz (74.93%) - 9,992,371 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.01%) - 238,179,835 stalled-cycles-backend:u # 1.50% backend cycles idle (75.03%) - 49,868,612,389 instructions:u # 3.15 insn per cycle - # 0.00 stalled cycles per insn (75.03%) - 4.618020219 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 665) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.757288e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.800092e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.800092e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.078460 sec +INFO: No Floating Point Exceptions have been reported + 18,383,455,963 cycles # 3.022 GHz + 50,054,891,477 instructions # 2.72 insn per cycle + 6.084475174 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 626) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.062465e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.229594e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.229594e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.763609 sec -INFO: No Floating Point Exceptions have been reported - 9,381,584,870 cycles:u # 3.380 GHz (74.92%) - 8,842,331 stalled-cycles-frontend:u # 0.09% frontend cycles idle (74.95%) - 2,405,936,514 stalled-cycles-backend:u # 25.65% backend cycles idle (74.94%) - 29,354,889,379 instructions:u # 3.13 insn per cycle - # 0.08 stalled cycles per insn (74.92%) - 2.779716498 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2625) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.164998e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.317783e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.317783e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.422760 sec +INFO: No Floating Point Exceptions have been reported + 10,425,198,156 cycles # 3.042 GHz + 29,176,493,270 instructions # 2.80 insn per cycle + 3.428392442 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2733) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926105795 Relative difference = 2.1036172727915933e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.036601e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.390743e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.390743e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.919623 sec -INFO: No Floating Point Exceptions have been reported - 6,461,957,078 cycles:u # 3.345 GHz (74.80%) - 9,210,517 stalled-cycles-frontend:u # 0.14% frontend cycles idle (74.76%) - 2,027,388,109 stalled-cycles-backend:u # 31.37% backend cycles idle (74.96%) - 15,191,337,244 instructions:u # 2.35 insn per cycle - # 0.13 stalled cycles per insn (75.15%) - 1.936614466 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3011) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.494730e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.797227e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.797227e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.439543 sec +INFO: No Floating Point Exceptions have been reported + 7,004,291,405 cycles # 2.865 GHz + 15,150,544,724 instructions # 2.16 insn per cycle + 2.445416331 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3020) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313472e+00 Avg ME (F77/C++) = 4.3134710926107935 Relative difference = 2.103616776553298e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.607457e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.924149e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.924149e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.384173 sec +INFO: No Floating Point Exceptions have been reported + 6,707,006,951 cycles # 2.807 GHz + 14,619,839,876 instructions # 2.18 insn per cycle + 2.390050397 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2621) (512y: 302) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.451987e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.626148e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.626148e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.147901 sec +INFO: No Floating Point Exceptions have been reported + 6,045,923,955 cycles # 1.918 GHz + 10,338,625,122 instructions # 1.71 insn per cycle + 3.153821789 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1268) (512y: 214) (512z: 2129) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134710926107935 +Relative difference = 2.103616776553298e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt index e7bcc40711..001e031ae4 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-10-04_11:52:38 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:25:51 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.943181e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.870847e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.897072e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.834176e+00 +- 1.462500e-01 ) GeV^0 -TOTAL : 0.352477 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.625139e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.523370e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.621120e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.487063 sec INFO: No Floating Point Exceptions have been reported - 876,247,972 cycles:u # 2.390 GHz (76.01%) - 2,497,336 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.03%) - 7,944,281 stalled-cycles-backend:u # 0.91% backend cycles idle (75.58%) - 1,498,533,832 instructions:u # 1.71 insn per cycle - # 0.01 stalled cycles per insn (76.36%) - 0.408456697 seconds time elapsed + 2,103,765,597 cycles # 2.940 GHz + 3,010,989,522 instructions # 1.43 insn per cycle + 0.772591402 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 131 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 4.313524e+00 -Avg ME (F77/GPU) = 4.3135525361867622 -Relative difference = 6.615515935930387e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695491848513 +Relative difference = 4.162503792787837e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.542954e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.605348e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.605348e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 4.270864 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 14,749,563,788 cycles:u # 3.447 GHz (74.95%) - 17,056,430 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.87%) - 2,639,824,270 stalled-cycles-backend:u # 17.90% backend cycles idle (74.90%) - 51,559,248,161 instructions:u # 3.50 insn per cycle - # 0.05 stalled cycles per insn (75.08%) - 4.282924101 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 723) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.742643e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.785190e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.785190e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 6.103332 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 18,609,905,827 cycles # 3.047 GHz + 51,215,063,345 instructions # 2.75 insn per cycle + 6.108967968 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 625) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,36 +104,33 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135737704578787 -Relative difference = 5.321390598852464e-08 +Avg ME (F77/C++) = 4.3135738277342170 +Relative difference = 3.9935743068669333e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.744129e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.077428e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.077428e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 1.971346 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,687,741,511 cycles:u # 3.379 GHz (74.94%) - 11,735,458 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.94%) - 2,605,727,205 stalled-cycles-backend:u # 38.96% backend cycles idle (74.94%) - 18,683,455,679 instructions:u # 2.79 insn per cycle - # 0.14 stalled cycles per insn (74.97%) - 1.983304528 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3319) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.182136e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.464848e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.464848e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.593631 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,948,906,401 cycles # 3.059 GHz + 19,317,685,979 instructions # 2.43 insn per cycle + 2.599267681 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3542) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -123,36 +138,33 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313573e+00 -Avg ME (F77/C++) = 4.3135733226081356 -Relative difference = 7.478907526568244e-08 +Avg ME (C++/C++) = 4.313572e+00 +Avg ME (F77/C++) = 4.3135722697479650 +Relative difference = 6.253470796314402e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.129095e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.256231e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.256231e+06 ) sec^-1 -MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.074921 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 3,554,292,535 cycles:u # 3.282 GHz (74.93%) - 6,570,022 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.89%) - 1,124,556,780 stalled-cycles-backend:u # 31.64% backend cycles idle (74.89%) - 8,625,582,750 instructions:u # 2.43 insn per cycle - # 0.13 stalled cycles per insn (74.90%) - 1.087177668 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3600) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.171182e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.241251e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.241251e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.368181 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,965,754,508 cycles # 2.888 GHz + 8,832,724,394 instructions # 2.23 insn per cycle + 1.373877553 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3715) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -160,16 +172,78 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135650658514351 -Relative difference = 1.526612799754012e-08 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 8.610704e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.814571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.814571e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.302060 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,747,639,043 cycles # 2.867 GHz + 8,431,545,053 instructions # 2.25 insn per cycle + 1.307700074 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3541) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.347091e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.938350e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.938350e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.737189 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 3,508,553,237 cycles # 2.014 GHz + 6,243,454,205 instructions # 1.78 insn per cycle + 1.742932448 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2325) (512y: 22) (512z: 2290) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313564e+00 +Avg ME (F77/C++) = 4.3135643536224961 +Relative difference = 8.197919301304478e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt index f3beef6e21..07d75bc161 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-10-04_11:52:49 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:26:12 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.293817e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.590857e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.628069e+07 ) sec^-1 -MeanMatrixElemValue = ( 6.834176e+00 +- 1.462500e-01 ) GeV^0 -TOTAL : 0.356321 sec +EvtsPerSec[Rmb+ME] (23) = ( 7.885122e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.628871e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.741563e+08 ) sec^-1 +MeanMatrixElemValue = ( 7.154219e+00 +- 1.620281e-01 ) GeV^0 +TOTAL : 0.487946 sec INFO: No Floating Point Exceptions have been reported - 853,436,101 cycles:u # 2.315 GHz (73.39%) - 2,358,095 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.58%) - 12,115,732 stalled-cycles-backend:u # 1.42% backend cycles idle (74.46%) - 1,575,446,030 instructions:u # 1.85 insn per cycle - # 0.01 stalled cycles per insn (73.75%) - 0.414967357 seconds time elapsed + 2,087,121,908 cycles # 2.910 GHz + 3,019,371,370 instructions # 1.45 insn per cycle + 0.773659070 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 4.313524e+00 -Avg ME (F77/GPU) = 4.3135525361867622 -Relative difference = 6.615515935930387e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 4.313490e+00 +Avg ME (F77/GPU) = 4.3136695491848513 +Relative difference = 4.162503792787837e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.718331e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.788416e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.788416e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 4.005246 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 13,835,421,219 cycles:u # 3.448 GHz (74.90%) - 17,069,198 stalled-cycles-frontend:u # 0.12% frontend cycles idle (74.96%) - 357,948,952 stalled-cycles-backend:u # 2.59% backend cycles idle (75.06%) - 49,471,917,423 instructions:u # 3.58 insn per cycle - # 0.01 stalled cycles per insn (75.08%) - 4.017265807 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 614) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.770821e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.815512e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.815512e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175644e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 6.006875 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 18,030,716,622 cycles # 2.999 GHz + 49,602,013,092 instructions # 2.75 insn per cycle + 6.012632180 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 613) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,36 +104,33 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313574e+00 -Avg ME (F77/C++) = 4.3135737704578787 -Relative difference = 5.321390598852464e-08 +Avg ME (F77/C++) = 4.3135738277342170 +Relative difference = 3.9935743068669333e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.816066e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.284665e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.284665e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.315915e+00 +- 1.953829e-01 ) GeV^0 -TOTAL : 1.684957 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,703,146,096 cycles:u # 3.370 GHz (74.86%) - 12,386,880 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.95%) - 1,730,264,085 stalled-cycles-backend:u # 30.34% backend cycles idle (74.95%) - 18,193,557,266 instructions:u # 3.19 insn per cycle - # 0.10 stalled cycles per insn (74.96%) - 1.696865901 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 3078) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.661063e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.005931e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.005931e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.175642e+00 +- 1.658767e-01 ) GeV^0 +TOTAL : 2.335528 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,124,417,342 cycles # 3.044 GHz + 18,533,238,890 instructions # 2.60 insn per cycle + 2.341180166 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3252) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -123,36 +138,33 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW -Avg ME (C++/C++) = 4.313573e+00 -Avg ME (F77/C++) = 4.3135733226081356 -Relative difference = 7.478907526568244e-08 +Avg ME (C++/C++) = 4.313572e+00 +Avg ME (F77/C++) = 4.3135722697479650 +Relative difference = 6.253470796314402e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.399416e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.080995e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.080995e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.289197e+00 +- 1.809101e-01 ) GeV^0 -TOTAL : 1.394508 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 4,683,415,974 cycles:u # 3.340 GHz (74.94%) - 7,993,120 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.90%) - 1,817,506,238 stalled-cycles-backend:u # 38.81% backend cycles idle (74.90%) - 10,765,447,899 instructions:u # 2.30 insn per cycle - # 0.17 stalled cycles per insn (74.90%) - 1.406990317 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4259) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.555350e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.026882e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.026882e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.973614 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,639,444,254 cycles # 2.850 GHz + 10,848,081,116 instructions # 1.92 insn per cycle + 1.979248695 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4274) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -160,18 +172,82 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313565e+00 -Avg ME (F77/C++) = 4.3135650658514351 -Relative difference = 1.526612799754012e-08 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.687423e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.182059e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.182059e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 1.928080 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 5,565,851,949 cycles # 2.880 GHz + 10,551,069,876 instructions # 1.90 insn per cycle + 1.933684179 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 4138) (512y: 12) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313565e+00 +Avg ME (F77/C++) = 4.3135645242873579 +Relative difference = 1.1028294269894893e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.666673e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.977886e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.977886e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.198861e+00 +- 1.710281e-01 ) GeV^0 +TOTAL : 2.332019 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 4,637,687,168 cycles # 1.985 GHz + 8,659,128,272 instructions # 1.87 insn per cycle + 2.337748946 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2799) (512y: 0) (512z: 2885) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313564e+00 +Avg ME (F77/C++) = 4.3135643536224961 +Relative difference = 8.197919301304478e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt index 3651a68d0f..17ba5d04ac 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-10-04_11:52:59 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:26:35 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.549341e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.895244e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.907493e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.412135 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.145183e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.832777e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.435037e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.531018 sec INFO: No Floating Point Exceptions have been reported - 1,012,754,443 cycles:u # 2.356 GHz (76.22%) - 2,315,059 stalled-cycles-frontend:u # 0.23% frontend cycles idle (76.85%) - 7,225,280 stalled-cycles-backend:u # 0.71% backend cycles idle (74.91%) - 1,673,431,305 instructions:u # 1.65 insn per cycle - # 0.00 stalled cycles per insn (73.79%) - 0.475036466 seconds time elapsed + 2,261,745,252 cycles # 2.959 GHz + 3,218,464,294 instructions # 1.42 insn per cycle + 0.823443286 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 228 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.270757e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.319460e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.319460e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.802290 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 16,546,095,737 cycles:u # 3.437 GHz (74.93%) - 31,931,509 stalled-cycles-frontend:u # 0.19% frontend cycles idle (74.91%) - 2,162,110,524 stalled-cycles-backend:u # 13.07% backend cycles idle (74.96%) - 51,706,306,670 instructions:u # 3.12 insn per cycle - # 0.04 stalled cycles per insn (75.04%) - 4.818986615 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 732) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.569215e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.602822e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.602822e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.791642 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 20,563,959,508 cycles # 3.026 GHz + 51,925,698,785 instructions # 2.53 insn per cycle + 6.797429254 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 655) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -95,27 +113,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.890360e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.044955e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.044955e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.876779 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,813,550,759 cycles:u # 3.397 GHz (74.86%) - 14,935,911 stalled-cycles-frontend:u # 0.15% frontend cycles idle (75.00%) - 3,058,985,000 stalled-cycles-backend:u # 31.17% backend cycles idle (75.08%) - 30,515,940,191 instructions:u # 3.11 insn per cycle - # 0.10 stalled cycles per insn (75.08%) - 2.893065928 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2927) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.866433e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.990571e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.990571e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.767439 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 11,513,295,665 cycles # 3.052 GHz + 30,592,567,538 instructions # 2.66 insn per cycle + 3.773601304 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2972) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -123,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -132,27 +147,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.151810e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.659007e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.659007e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.647064 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 5,474,101,790 cycles:u # 3.300 GHz (74.93%) - 12,312,874 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.93%) - 1,329,742,792 stalled-cycles-backend:u # 24.29% backend cycles idle (74.96%) - 13,319,370,462 instructions:u # 2.43 insn per cycle - # 0.10 stalled cycles per insn (74.96%) - 1.663227013 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3019) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.729775e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.061750e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.061750e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.323879 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,711,394,456 cycles # 2.882 GHz + 13,608,749,696 instructions # 2.03 insn per cycle + 2.329702373 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3118) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -160,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -169,9 +181,73 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.169662e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.568966e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.568966e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.135490 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,181,875,885 cycles # 2.888 GHz + 12,975,632,555 instructions # 2.10 insn per cycle + 2.141464236 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2851) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.298256e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.453472e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.453472e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.288067 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,406,126,196 cycles # 1.946 GHz + 8,701,338,330 instructions # 1.36 insn per cycle + 3.294025783 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1792) (512y: 130) (512z: 2014) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt index 100ace0fa7..2ae9588cbc 100644 --- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -DATE: 2024-10-04_11:53:13 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:27:02 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.640738e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.025699e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.039692e+07 ) sec^-1 -MeanMatrixElemValue = ( 7.088120e+00 +- 1.629041e-01 ) GeV^0 -TOTAL : 0.408456 sec +EvtsPerSec[Rmb+ME] (23) = ( 4.150402e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.856906e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.454476e+07 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 0.526172 sec INFO: No Floating Point Exceptions have been reported - 1,044,567,514 cycles:u # 2.447 GHz (76.45%) - 2,397,919 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.70%) - 5,774,628 stalled-cycles-backend:u # 0.55% backend cycles idle (73.30%) - 1,595,969,251 instructions:u # 1.53 insn per cycle - # 0.00 stalled cycles per insn (73.73%) - 0.471349867 seconds time elapsed + 2,295,452,706 cycles # 2.993 GHz + 3,307,765,060 instructions # 1.44 insn per cycle + 0.824169356 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 216 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 4.313472e+00 Avg ME (F77/GPU) = 4.3134711012809239 Relative difference = 2.0835166567625394e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.410893e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.467452e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.467452e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 4.532366 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 15,572,968,721 cycles:u # 3.427 GHz (75.01%) - 31,406,791 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.00%) - 49,131,449 stalled-cycles-backend:u # 0.32% backend cycles idle (75.01%) - 49,902,625,148 instructions:u # 3.20 insn per cycle - # 0.00 stalled cycles per insn (75.01%) - 4.549108797 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 652) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.671429e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.710309e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.710309e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 6.383632 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 19,535,242,963 cycles # 3.058 GHz + 49,954,649,142 instructions # 2.56 insn per cycle + 6.389286053 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 599) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -86,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -95,27 +113,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.990717e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.154860e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.154860e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 2.810554 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 9,549,397,093 cycles:u # 3.383 GHz (74.95%) - 15,707,127 stalled-cycles-frontend:u # 0.16% frontend cycles idle (75.06%) - 1,948,445,300 stalled-cycles-backend:u # 20.40% backend cycles idle (75.06%) - 28,971,717,461 instructions:u # 3.03 insn per cycle - # 0.07 stalled cycles per insn (75.06%) - 2.827859944 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2723) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.974616e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.107062e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.107062e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.633598 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 11,048,626,108 cycles # 3.037 GHz + 29,139,783,516 instructions # 2.64 insn per cycle + 3.639341681 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2815) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -123,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -132,27 +147,24 @@ Avg ME (F77/C++) = 4.3134711778082178 Relative difference = 1.906102050071626e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_HEFT_GG_BBX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 5.896446e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.233456e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.233456e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.211102e+00 +- 1.606204e-01 ) GeV^0 -TOTAL : 1.961732 sec -INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW - 6,573,075,125 cycles:u # 3.330 GHz (74.88%) - 18,540,499 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.91%) - 2,224,016,527 stalled-cycles-backend:u # 33.84% backend cycles idle (74.90%) - 15,037,369,471 instructions:u # 2.29 insn per cycle - # 0.15 stalled cycles per insn (74.88%) - 1.978117739 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3208) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.862780e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.086642e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.086642e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.826812 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 8,110,327,392 cycles # 2.866 GHz + 15,189,804,265 instructions # 1.87 insn per cycle + 2.832751384 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3203) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } @@ -160,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW Avg ME (C++/C++) = 4.313472e+00 @@ -169,9 +181,73 @@ Avg ME (F77/C++) = 4.3134712319139954 Relative difference = 1.7806676491157786e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.093395e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.337729e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.337729e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 2.668875 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 7,696,372,242 cycles # 2.878 GHz + 14,484,401,690 instructions # 1.88 insn per cycle + 2.674814198 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2775) (512y: 304) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.225341e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.377311e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.377311e+05 ) sec^-1 +MeanMatrixElemValue = ( 7.148017e+00 +- 1.609110e-01 ) GeV^0 +TOTAL : 3.360677 sec +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW + 6,555,331,117 cycles # 1.948 GHz + 9,892,801,123 instructions # 1.51 insn per cycle + 3.366641015 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1565) (512y: 216) (512z: 2216) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 } +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW +Avg ME (C++/C++) = 4.313472e+00 +Avg ME (F77/C++) = 4.3134712319139954 +Relative difference = 1.7806676491157786e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt index a827ba6b8b..31ad35f4d6 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-10-04_11:51:39 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:23:54 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.582456e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.122819e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.124654e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.432968 sec -INFO: No Floating Point Exceptions have been reported - 1,090,146,119 cycles:u # 2.686 GHz (75.76%) - 2,302,256 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.88%) - 6,356,529 stalled-cycles-backend:u # 0.58% backend cycles idle (76.51%) - 1,570,621,288 instructions:u # 1.44 insn per cycle - # 0.00 stalled cycles per insn (76.59%) - 0.488099481 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.769640e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.787416e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.790414e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.468036 sec +INFO: No Floating Point Exceptions have been reported + 2,037,551,034 cycles # 2.955 GHz + 2,992,853,394 instructions # 1.47 insn per cycle + 0.746736203 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.109493e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.286013e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.286503e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.420410 sec -INFO: No Floating Point Exceptions have been reported - 1,200,943,623 cycles:u # 2.786 GHz (74.71%) - 2,519,175 stalled-cycles-frontend:u # 0.21% frontend cycles idle (73.61%) - 6,893,773 stalled-cycles-backend:u # 0.57% backend cycles idle (75.00%) - 1,703,958,868 instructions:u # 1.42 insn per cycle - # 0.00 stalled cycles per insn (75.05%) - 0.468305523 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.955252e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.072819e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.081098e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.486910 sec +INFO: No Floating Point Exceptions have been reported + 2,053,456,592 cycles # 2.899 GHz + 3,023,614,282 instructions # 1.47 insn per cycle + 0.768139647 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176587E-006 -Relative difference = 3.3392753387325367e-07 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.139544e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.144887e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.144887e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.104786 sec -INFO: No Floating Point Exceptions have been reported - 371,842,346 cycles:u # 3.460 GHz (72.72%) - 29,514 stalled-cycles-frontend:u # 0.01% frontend cycles idle (70.57%) - 40,691,461 stalled-cycles-backend:u # 10.94% backend cycles idle (71.98%) - 1,347,611,870 instructions:u # 3.62 insn per cycle - # 0.03 stalled cycles per insn (75.70%) - 0.111859915 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1627) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.556594e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.560204e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.560204e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.150865 sec +INFO: No Floating Point Exceptions have been reported + 468,041,301 cycles # 3.038 GHz + 1,389,874,591 instructions # 2.97 insn per cycle + 0.154561545 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.003049e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.005201e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.005201e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.054728 sec -INFO: No Floating Point Exceptions have been reported - 192,768,732 cycles:u # 3.365 GHz (73.34%) - 31,385 stalled-cycles-frontend:u # 0.02% frontend cycles idle (72.15%) - 21,025,011 stalled-cycles-backend:u # 10.91% backend cycles idle (72.15%) - 662,523,571 instructions:u # 3.44 insn per cycle - # 0.03 stalled cycles per insn (72.15%) - 0.061486153 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 8749) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.755475e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.769207e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.769207e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.080074 sec +INFO: No Floating Point Exceptions have been reported + 240,347,702 cycles # 2.886 GHz + 693,020,093 instructions # 2.88 insn per cycle + 0.083834683 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9482) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.073377e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.082771e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.082771e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.027370 sec -INFO: No Floating Point Exceptions have been reported - 90,359,469 cycles:u # 3.007 GHz (73.90%) - 75,752 stalled-cycles-frontend:u # 0.08% frontend cycles idle (73.52%) - 11,570,982 stalled-cycles-backend:u # 12.81% backend cycles idle (73.52%) - 233,290,158 instructions:u # 2.58 insn per cycle - # 0.05 stalled cycles per insn (73.52%) - 0.034043713 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7869) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.470546e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.476392e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.476392e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037947 sec +INFO: No Floating Point Exceptions have been reported + 113,951,288 cycles # 2.767 GHz + 257,914,170 instructions # 2.26 insn per cycle + 0.041775140 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8501) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.587475e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.594909e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.594909e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.035255 sec +INFO: No Floating Point Exceptions have been reported + 102,623,828 cycles # 2.666 GHz + 240,025,776 instructions # 2.34 insn per cycle + 0.039073005 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8143) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.268803e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.274169e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.274169e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.043872 sec +INFO: No Floating Point Exceptions have been reported + 90,257,947 cycles # 1.910 GHz + 134,303,865 instructions # 1.49 insn per cycle + 0.047785620 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1943) (512y: 126) (512z: 7086) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt index e9d19cd062..520fc6d267 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-10-04_11:51:45 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:24:05 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.854502e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.456024e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.457831e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.389807 sec -INFO: No Floating Point Exceptions have been reported - 1,103,828,674 cycles:u # 2.767 GHz (74.97%) - 2,466,335 stalled-cycles-frontend:u # 0.22% frontend cycles idle (76.13%) - 5,284,848 stalled-cycles-backend:u # 0.48% backend cycles idle (75.35%) - 1,547,668,644 instructions:u # 1.40 insn per cycle - # 0.00 stalled cycles per insn (75.31%) - 0.444109511 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.800320e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.818517e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.821599e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.470187 sec +INFO: No Floating Point Exceptions have been reported + 2,052,814,472 cycles # 2.969 GHz + 2,949,612,457 instructions # 1.44 insn per cycle + 0.750557916 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.131911e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.312774e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.313271e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.418749 sec -INFO: No Floating Point Exceptions have been reported - 1,170,737,076 cycles:u # 2.725 GHz (76.02%) - 2,504,950 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.22%) - 5,156,359 stalled-cycles-backend:u # 0.44% backend cycles idle (73.53%) - 1,648,323,219 instructions:u # 1.41 insn per cycle - # 0.00 stalled cycles per insn (73.44%) - 0.472468324 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.127619e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.255846e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.264216e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.483932 sec +INFO: No Floating Point Exceptions have been reported + 2,088,813,579 cycles # 2.962 GHz + 3,090,582,596 instructions # 1.48 insn per cycle + 0.765249817 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562860176587E-006 -Relative difference = 3.3392753387325367e-07 +Avg ME (F77/GPU) = 8.1274562860176604E-006 +Relative difference = 3.3392753366481633e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.152238e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.158063e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.158063e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.104212 sec -INFO: No Floating Point Exceptions have been reported - 369,886,605 cycles:u # 3.461 GHz (69.04%) - 34,788 stalled-cycles-frontend:u # 0.01% frontend cycles idle (72.29%) - 43,914,996 stalled-cycles-backend:u # 11.87% backend cycles idle (76.03%) - 1,330,155,157 instructions:u # 3.60 insn per cycle - # 0.03 stalled cycles per insn (77.58%) - 0.111616153 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1597) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.583197e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.586632e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.586632e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.148844 sec +INFO: No Floating Point Exceptions have been reported + 465,656,480 cycles # 3.065 GHz + 1,385,063,684 instructions # 2.97 insn per cycle + 0.152528488 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167185E-006 Relative difference = 3.339276495559746e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 9.908546e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.928912e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.928912e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.054799 sec -INFO: No Floating Point Exceptions have been reported - 192,144,173 cycles:u # 3.348 GHz (73.26%) - 27,034 stalled-cycles-frontend:u # 0.01% frontend cycles idle (72.20%) - 19,808,579 stalled-cycles-backend:u # 10.31% backend cycles idle (72.19%) - 659,238,962 instructions:u # 3.43 insn per cycle - # 0.03 stalled cycles per insn (72.19%) - 0.062052454 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 8794) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.701779e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.714329e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.714329e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.079935 sec +INFO: No Floating Point Exceptions have been reported + 238,338,142 cycles # 2.869 GHz + 689,077,380 instructions # 2.89 insn per cycle + 0.083658919 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9525) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860167168E-006 Relative difference = 3.3392764976441195e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.137277e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.146846e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.146846e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.026043 sec -INFO: No Floating Point Exceptions have been reported - 86,694,030 cycles:u # 3.023 GHz (72.65%) - 23,484 stalled-cycles-frontend:u # 0.03% frontend cycles idle (72.26%) - 8,835,392 stalled-cycles-backend:u # 10.19% backend cycles idle (72.26%) - 231,252,295 instructions:u # 2.67 insn per cycle - # 0.04 stalled cycles per insn (72.26%) - 0.032914407 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7839) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.516138e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.522347e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.522347e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.036146 sec +INFO: No Floating Point Exceptions have been reported + 111,533,372 cycles # 2.836 GHz + 253,485,212 instructions # 2.27 insn per cycle + 0.039854413 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8457) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562860174791E-006 Relative difference = 3.3392755596761116e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.619024e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.626212e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.626212e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.033802 sec +INFO: No Floating Point Exceptions have been reported + 100,180,790 cycles # 2.704 GHz + 235,622,302 instructions # 2.35 insn per cycle + 0.037533375 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8101) (512y: 150) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.260779e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.266519e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.266519e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.043311 sec +INFO: No Floating Point Exceptions have been reported + 88,103,069 cycles # 1.888 GHz + 129,731,242 instructions # 1.47 insn per cycle + 0.047213046 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1899) (512y: 126) (512z: 7084) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274562860174791E-006 +Relative difference = 3.3392755596761116e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt index 8c49ada640..5ff76d67ba 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-10-04_11:51:50 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:24:16 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.132723e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.300853e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.301412e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.100225e-04 +- 2.256521e-04 ) GeV^-4 -TOTAL : 0.360615 sec -INFO: No Floating Point Exceptions have been reported - 1,000,731,193 cycles:u # 2.709 GHz (74.29%) - 2,424,404 stalled-cycles-frontend:u # 0.24% frontend cycles idle (76.12%) - 5,293,783 stalled-cycles-backend:u # 0.53% backend cycles idle (76.52%) - 1,461,513,671 instructions:u # 1.46 insn per cycle - # 0.00 stalled cycles per insn (76.19%) - 0.408059445 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.211219e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.220457e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.222410e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 +TOTAL : 0.474415 sec +INFO: No Floating Point Exceptions have been reported + 2,042,215,104 cycles # 2.959 GHz + 2,967,666,575 instructions # 1.45 insn per cycle + 0.749013771 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.806295e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.371717e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.373128e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.043589e-02 +- 5.707640e-02 ) GeV^-4 -TOTAL : 0.380381 sec -INFO: No Floating Point Exceptions have been reported - 1,039,678,236 cycles:u # 2.656 GHz (75.58%) - 2,424,013 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.54%) - 6,246,846 stalled-cycles-backend:u # 0.60% backend cycles idle (76.28%) - 1,582,406,209 instructions:u # 1.52 insn per cycle - # 0.00 stalled cycles per insn (76.05%) - 0.431428242 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.889452e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.983579e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.991978e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020494e-03 +- 4.025605e-03 ) GeV^-4 +TOTAL : 0.474378 sec +INFO: No Floating Point Exceptions have been reported + 2,044,733,349 cycles # 2.963 GHz + 2,989,289,340 instructions # 1.46 insn per cycle + 0.749063185 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 8.127375e-06 -Avg ME (F77/GPU) = 8.1275160277913510E-006 -Relative difference = 1.735219444797551e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272869669930272E-006 +Relative difference = 4.548524165778887e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.299642e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.305836e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.305836e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.101767 sec -INFO: No Floating Point Exceptions have been reported - 361,295,005 cycles:u # 3.459 GHz (72.61%) - 24,284 stalled-cycles-frontend:u # 0.01% frontend cycles idle (70.61%) - 45,702,302 stalled-cycles-backend:u # 12.65% backend cycles idle (73.41%) - 1,324,193,787 instructions:u # 3.67 insn per cycle - # 0.03 stalled cycles per insn (77.06%) - 0.109508489 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1635) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.559321e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.562914e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.562914e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.150601 sec +INFO: No Floating Point Exceptions have been reported + 464,247,537 cycles # 3.020 GHz + 1,382,106,488 instructions # 2.98 insn per cycle + 0.154369193 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127810e-06 -Avg ME (F77/C++) = 8.1278101435899343E-006 -Relative difference = 1.76664974860306e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127811e-06 +Avg ME (F77/C++) = 8.1278105271212486E-006 +Relative difference = 5.8180333155894157e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.869110e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.877448e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.877448e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.030178 sec -INFO: No Floating Point Exceptions have been reported - 101,110,114 cycles:u # 3.084 GHz (76.07%) - 26,656 stalled-cycles-frontend:u # 0.03% frontend cycles idle (75.72%) - 14,337,366 stalled-cycles-backend:u # 14.18% backend cycles idle (75.72%) - 343,617,527 instructions:u # 3.40 insn per cycle - # 0.04 stalled cycles per insn (75.72%) - 0.037469160 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9270) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.252858e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.257505e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.257505e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.044283 sec +INFO: No Floating Point Exceptions have been reported + 132,985,054 cycles # 2.803 GHz + 372,125,739 instructions # 2.80 insn per cycle + 0.048041967 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10141) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127807e-06 -Avg ME (F77/C++) = 8.1278071402353976E-006 -Relative difference = 1.725378052944308e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127809e-06 +Avg ME (F77/C++) = 8.1278090510674588E-006 +Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.107990e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.147875e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.147875e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.014681 sec -INFO: No Floating Point Exceptions have been reported - 54,299,137 cycles:u # 3.141 GHz (64.92%) - 14,620 stalled-cycles-frontend:u # 0.03% frontend cycles idle (54.79%) - 3,861,986 stalled-cycles-backend:u # 7.11% backend cycles idle (54.13%) - 123,494,904 instructions:u # 2.27 insn per cycle - # 0.03 stalled cycles per insn (60.44%) - 0.021288975 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8628) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.855200e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.879676e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.879676e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.020512 sec +INFO: No Floating Point Exceptions have been reported + 65,226,143 cycles # 2.754 GHz + 142,813,798 instructions # 2.19 insn per cycle + 0.024211039 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9241) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127535e-06 -Avg ME (F77/C++) = 8.1275351122593251E-006 -Relative difference = 1.3812222848044195e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.108853e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.137651e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.137651e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.019016 sec +INFO: No Floating Point Exceptions have been reported + 61,573,217 cycles # 2.773 GHz + 132,819,685 instructions # 2.16 insn per cycle + 0.022685850 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8959) (512y: 28) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.385538e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.406562e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.406562e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.024358 sec +INFO: No Floating Point Exceptions have been reported + 53,055,109 cycles # 1.895 GHz + 79,577,124 instructions # 1.50 insn per cycle + 0.028648864 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2836) (512y: 30) (512z: 7437) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275369863475849E-006 +Relative difference = 1.6797726498700304e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt index 65e785a100..662cc2f451 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-10-04_11:51:55 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:24:27 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.143836e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.307334e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.307896e+04 ) sec^-1 -MeanMatrixElemValue = ( 3.100225e-04 +- 2.256521e-04 ) GeV^-4 -TOTAL : 0.380148 sec -INFO: No Floating Point Exceptions have been reported - 1,009,847,531 cycles:u # 2.736 GHz (72.73%) - 2,537,228 stalled-cycles-frontend:u # 0.25% frontend cycles idle (73.78%) - 5,245,061 stalled-cycles-backend:u # 0.52% backend cycles idle (76.61%) - 1,458,641,375 instructions:u # 1.44 insn per cycle - # 0.00 stalled cycles per insn (76.62%) - 0.431707059 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.235162e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.244960e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.246839e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.188141e-04 +- 6.565202e-04 ) GeV^-4 +TOTAL : 0.477065 sec +INFO: No Floating Point Exceptions have been reported + 2,025,818,805 cycles # 2.919 GHz + 2,939,784,013 instructions # 1.45 insn per cycle + 0.752407839 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 3.816620e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.417499e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.418878e+05 ) sec^-1 -MeanMatrixElemValue = ( 7.043589e-02 +- 5.707640e-02 ) GeV^-4 -TOTAL : 0.380664 sec -INFO: No Floating Point Exceptions have been reported - 1,061,615,308 cycles:u # 2.719 GHz (73.75%) - 2,499,819 stalled-cycles-frontend:u # 0.24% frontend cycles idle (74.06%) - 6,189,233 stalled-cycles-backend:u # 0.58% backend cycles idle (75.34%) - 1,572,709,866 instructions:u # 1.48 insn per cycle - # 0.00 stalled cycles per insn (75.31%) - 0.428586389 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.112799e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 8.201470e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 8.209428e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.020496e-03 +- 4.025606e-03 ) GeV^-4 +TOTAL : 0.472481 sec +INFO: No Floating Point Exceptions have been reported + 2,041,894,086 cycles # 2.955 GHz + 2,946,838,758 instructions # 1.44 insn per cycle + 0.748409052 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 8.127375e-06 -Avg ME (F77/GPU) = 8.1275164883853706E-006 -Relative difference = 1.740886637704508e-05 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 8.127250e-06 +Avg ME (F77/GPU) = 8.1272866419447706E-006 +Relative difference = 4.508529302013153e-06 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.297453e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.303599e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.303599e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.274747e-01 +- 1.272814e-01 ) GeV^-4 -TOTAL : 0.101240 sec -INFO: No Floating Point Exceptions have been reported - 352,808,274 cycles:u # 3.397 GHz (72.95%) - 29,031 stalled-cycles-frontend:u # 0.01% frontend cycles idle (76.41%) - 40,275,340 stalled-cycles-backend:u # 11.42% backend cycles idle (76.92%) - 1,323,447,323 instructions:u # 3.75 insn per cycle - # 0.03 stalled cycles per insn (76.92%) - 0.108960973 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1608) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.524192e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.527540e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.527540e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.177153e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.151291 sec +INFO: No Floating Point Exceptions have been reported + 467,037,767 cycles # 3.023 GHz + 1,376,809,181 instructions # 2.95 insn per cycle + 0.154965126 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127810e-06 -Avg ME (F77/C++) = 8.1278101435899343E-006 -Relative difference = 1.76664974860306e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127811e-06 +Avg ME (F77/C++) = 8.1278105271212486E-006 +Relative difference = 5.8180333155894157e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.906770e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.914671e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.914671e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.274746e-01 +- 1.272813e-01 ) GeV^-4 -TOTAL : 0.029000 sec -INFO: No Floating Point Exceptions have been reported - 98,322,942 cycles:u # 3.114 GHz (75.15%) - 21,620 stalled-cycles-frontend:u # 0.02% frontend cycles idle (74.79%) - 14,757,264 stalled-cycles-backend:u # 15.01% backend cycles idle (74.79%) - 343,482,210 instructions:u # 3.49 insn per cycle - # 0.04 stalled cycles per insn (74.79%) - 0.036350431 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 9253) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.250589e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.254973e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.254973e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.177152e-04 +- 6.554185e-04 ) GeV^-4 +TOTAL : 0.043394 sec +INFO: No Floating Point Exceptions have been reported + 130,510,666 cycles # 2.799 GHz + 367,293,969 instructions # 2.81 insn per cycle + 0.047185544 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4:10124) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127807e-06 -Avg ME (F77/C++) = 8.1278071402353976E-006 -Relative difference = 1.725378052944308e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127809e-06 +Avg ME (F77/C++) = 8.1278090510674588E-006 +Relative difference = 6.2830535070193674e-09 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.090520e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.126703e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.126703e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.275185e-01 +- 1.273251e-01 ) GeV^-4 -TOTAL : 0.014211 sec -INFO: No Floating Point Exceptions have been reported - 52,537,622 cycles:u # 3.131 GHz (64.84%) - 12,697 stalled-cycles-frontend:u # 0.02% frontend cycles idle (53.43%) - 4,440,172 stalled-cycles-backend:u # 8.45% backend cycles idle (52.77%) - 122,003,851 instructions:u # 2.32 insn per cycle - # 0.04 stalled cycles per insn (61.17%) - 0.021303380 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8595) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.895966e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.919717e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.919717e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.019526 sec +INFO: No Floating Point Exceptions have been reported + 63,088,654 cycles # 2.773 GHz + 138,078,009 instructions # 2.19 insn per cycle + 0.023227465 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 9196) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 8.127535e-06 -Avg ME (F77/C++) = 8.1275351122593251E-006 -Relative difference = 1.3812222848044195e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.167323e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.196847e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.196847e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165746e-04 +- 6.542823e-04 ) GeV^-4 +TOTAL : 0.017922 sec +INFO: No Floating Point Exceptions have been reported + 58,004,801 cycles # 2.745 GHz + 127,991,431 instructions # 2.21 insn per cycle + 0.021624106 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8910) (512y: 28) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275366216540664E-006 +Relative difference = 4.655111786058001e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.372680e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.393901e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.393901e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.165747e-04 +- 6.542824e-04 ) GeV^-4 +TOTAL : 0.023632 sec +INFO: No Floating Point Exceptions have been reported + 50,117,827 cycles # 1.863 GHz + 74,764,014 instructions # 1.49 insn per cycle + 0.027462672 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2791) (512y: 30) (512z: 7439) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127537e-06 +Avg ME (F77/C++) = 8.1275369863475849E-006 +Relative difference = 1.6797726498700304e-09 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt index c74dc823ad..2860254d4c 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-10-04_11:52:00 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:24:37 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.619421e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.149565e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.151432e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.416231 sec -INFO: No Floating Point Exceptions have been reported - 1,112,118,022 cycles:u # 2.751 GHz (75.68%) - 2,293,715 stalled-cycles-frontend:u # 0.21% frontend cycles idle (76.54%) - 11,422,495 stalled-cycles-backend:u # 1.03% backend cycles idle (76.29%) - 1,549,747,180 instructions:u # 1.39 insn per cycle - # 0.01 stalled cycles per insn (76.39%) - 0.464311808 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.738978e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.756587e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.759630e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.470308 sec +INFO: No Floating Point Exceptions have been reported + 2,029,517,703 cycles # 2.933 GHz + 2,946,537,029 instructions # 1.45 insn per cycle + 0.750454094 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.124077e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.299484e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.300004e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.441718 sec -INFO: No Floating Point Exceptions have been reported - 1,258,352,274 cycles:u # 2.790 GHz (74.97%) - 2,486,955 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.14%) - 5,885,059 stalled-cycles-backend:u # 0.47% backend cycles idle (74.68%) - 1,742,144,024 instructions:u # 1.38 insn per cycle - # 0.00 stalled cycles per insn (74.17%) - 0.491667431 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 6.975249e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.105448e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.114521e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.483108 sec +INFO: No Floating Point Exceptions have been reported + 2,093,310,274 cycles # 2.962 GHz + 3,111,318,214 instructions # 1.49 insn per cycle + 0.763440898 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405183E-006 -Relative difference = 3.336909458255062e-07 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.199069e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.205010e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.205010e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.103842 sec -INFO: No Floating Point Exceptions have been reported - 364,350,368 cycles:u # 3.422 GHz (69.40%) - 39,775 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.04%) - 34,532,847 stalled-cycles-backend:u # 9.48% backend cycles idle (77.50%) - 1,339,623,327 instructions:u # 3.68 insn per cycle - # 0.03 stalled cycles per insn (77.49%) - 0.110807545 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1630) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.479369e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.482863e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.482863e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.153894 sec +INFO: No Floating Point Exceptions have been reported + 471,996,695 cycles # 3.005 GHz + 1,398,458,325 instructions # 2.96 insn per cycle + 0.157639380 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.012681e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.015094e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.015094e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.054171 sec -INFO: No Floating Point Exceptions have been reported - 192,108,317 cycles:u # 3.383 GHz (73.86%) - 28,861 stalled-cycles-frontend:u # 0.02% frontend cycles idle (71.91%) - 22,278,155 stalled-cycles-backend:u # 11.60% backend cycles idle (71.90%) - 658,320,230 instructions:u # 3.43 insn per cycle - # 0.03 stalled cycles per insn (71.90%) - 0.061213813 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 8728) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.817579e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.830221e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.830221e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.079435 sec +INFO: No Floating Point Exceptions have been reported + 237,264,825 cycles # 2.877 GHz + 688,242,182 instructions # 2.90 insn per cycle + 0.083121228 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9334) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.136754e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.146316e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.146316e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.026658 sec -INFO: No Floating Point Exceptions have been reported - 97,797,679 cycles:u # 3.344 GHz (68.20%) - 28,245 stalled-cycles-frontend:u # 0.03% frontend cycles idle (72.80%) - 10,553,550 stalled-cycles-backend:u # 10.79% backend cycles idle (72.79%) - 229,743,009 instructions:u # 2.35 insn per cycle - # 0.05 stalled cycles per insn (72.79%) - 0.033574814 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7892) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.469077e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.475276e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.475276e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.038002 sec +INFO: No Floating Point Exceptions have been reported + 113,713,809 cycles # 2.755 GHz + 253,123,745 instructions # 2.23 insn per cycle + 0.041850302 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8363) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.615978e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.623720e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.623720e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.034706 sec +INFO: No Floating Point Exceptions have been reported + 101,196,884 cycles # 2.667 GHz + 233,657,279 instructions # 2.31 insn per cycle + 0.038483246 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7501) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.233700e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.238685e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.238685e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.045046 sec +INFO: No Floating Point Exceptions have been reported + 91,035,012 cycles # 1.880 GHz + 133,158,052 instructions # 1.46 insn per cycle + 0.048995485 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2084) (512y: 122) (512z: 6354) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt index eaf646f1b2..91c8760286 100644 --- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt @@ -1,67 +1,83 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -DATE: 2024-10-04_11:52:06 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:24:48 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.891944e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.503892e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.505792e+03 ) sec^-1 -MeanMatrixElemValue = ( 5.989810e-05 +- 3.867612e-05 ) GeV^-4 -TOTAL : 0.389977 sec -INFO: No Floating Point Exceptions have been reported - 1,080,122,392 cycles:u # 2.704 GHz (75.45%) - 2,305,877 stalled-cycles-frontend:u # 0.21% frontend cycles idle (75.49%) - 6,693,024 stalled-cycles-backend:u # 0.62% backend cycles idle (76.00%) - 1,564,769,595 instructions:u # 1.45 insn per cycle - # 0.00 stalled cycles per insn (76.45%) - 0.445401382 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 2.782094e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.800671e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.804051e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.471374 sec +INFO: No Floating Point Exceptions have been reported + 2,059,228,408 cycles # 2.969 GHz + 2,976,693,819 instructions # 1.45 insn per cycle + 0.751857693 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 255 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ......................................................................... -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe -p 64 256 1 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 2.144095e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.329788e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.330291e+05 ) sec^-1 -MeanMatrixElemValue = ( 3.402315e-01 +- 3.184905e-01 ) GeV^-4 -TOTAL : 0.421263 sec -INFO: No Floating Point Exceptions have been reported - 1,206,970,978 cycles:u # 2.804 GHz (74.04%) - 2,604,515 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.09%) - 6,058,522 stalled-cycles-backend:u # 0.50% backend cycles idle (74.66%) - 1,701,293,584 instructions:u # 1.41 insn per cycle - # 0.00 stalled cycles per insn (75.82%) - 0.474343055 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 7.066550e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.182190e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.190564e+05 ) sec^-1 +MeanMatrixElemValue = ( 8.048215e-03 +- 4.042405e-03 ) GeV^-4 +TOTAL : 0.485261 sec +INFO: No Floating Point Exceptions have been reported + 2,087,825,759 cycles # 2.964 GHz + 3,088,551,405 instructions # 1.48 insn per cycle + 0.765530482 seconds time elapsed ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -69,34 +85,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 8.127459e-06 -Avg ME (F77/GPU) = 8.1274562879405183E-006 -Relative difference = 3.336909458255062e-07 +Avg ME (F77/GPU) = 8.1274562879405200E-006 +Relative difference = 3.3369094561706885e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 5.126453e+03 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.132278e+03 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.132278e+03 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.104730 sec -INFO: No Floating Point Exceptions have been reported - 362,909,682 cycles:u # 3.378 GHz (70.55%) - 34,800 stalled-cycles-frontend:u # 0.01% frontend cycles idle (72.02%) - 44,810,281 stalled-cycles-backend:u # 12.35% backend cycles idle (75.74%) - 1,342,031,847 instructions:u # 3.70 insn per cycle - # 0.03 stalled cycles per insn (77.69%) - 0.111618296 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1603) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.501790e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.505136e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.505136e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.152240 sec +INFO: No Floating Point Exceptions have been reported + 470,061,720 cycles # 3.025 GHz + 1,393,763,209 instructions # 2.97 insn per cycle + 0.155889798 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -104,34 +119,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274562948736117E-006 Relative difference = 3.32837900190667e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.005583e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.007752e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.007752e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.053854 sec -INFO: No Floating Point Exceptions have been reported - 181,501,491 cycles:u # 3.216 GHz (71.73%) - 26,353 stalled-cycles-frontend:u # 0.01% frontend cycles idle (71.73%) - 22,628,933 stalled-cycles-backend:u # 12.47% backend cycles idle (71.73%) - 673,671,420 instructions:u # 3.71 insn per cycle - # 0.03 stalled cycles per insn (72.78%) - 0.060579522 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 8787) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.954658e+03 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.968212e+03 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.968212e+03 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.077211 sec +INFO: No Floating Point Exceptions have been reported + 235,223,590 cycles # 2.925 GHz + 684,213,441 instructions # 2.91 insn per cycle + 0.080969906 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 9368) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -139,34 +151,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563175290919E-006 Relative difference = 3.3005037703909805e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.127532e+04 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.137036e+04 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.137036e+04 ) sec^-1 -MeanMatrixElemValue = ( 1.266821e-01 +- 1.264895e-01 ) GeV^-4 -TOTAL : 0.026209 sec -INFO: No Floating Point Exceptions have been reported - 96,195,626 cycles:u # 3.334 GHz (81.02%) - 13,254 stalled-cycles-frontend:u # 0.01% frontend cycles idle (73.24%) - 11,053,673 stalled-cycles-backend:u # 11.49% backend cycles idle (72.43%) - 226,834,304 instructions:u # 2.36 insn per cycle - # 0.05 stalled cycles per insn (72.42%) - 0.033077061 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7874) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.468005e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.473933e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.473933e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.037269 sec +INFO: No Floating Point Exceptions have been reported + 111,406,073 cycles # 2.752 GHz + 248,660,524 instructions # 2.23 insn per cycle + 0.041010123 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 8316) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } @@ -174,16 +183,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 8.127459e-06 Avg ME (F77/C++) = 8.1274563450143301E-006 Relative difference = 3.266686019634872e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.687371e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.694987e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.694987e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.032517 sec +INFO: No Floating Point Exceptions have been reported + 99,075,407 cycles # 2.779 GHz + 229,256,995 instructions # 2.31 insn per cycle + 0.036194322 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 7452) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 1.125360e+04 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.130339e+04 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.130339e+04 ) sec^-1 +MeanMatrixElemValue = ( 7.185537e-04 +- 6.562553e-04 ) GeV^-4 +TOTAL : 0.048501 sec +INFO: No Floating Point Exceptions have been reported + 88,927,475 cycles # 1.713 GHz + 128,580,821 instructions # 1.45 insn per cycle + 0.052459192 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2035) (512y: 122) (512z: 6355) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 8.127459e-06 +Avg ME (F77/C++) = 8.1274563450143301E-006 +Relative difference = 3.266686019634872e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt index 4f73e04d01..bad45a7dc8 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-10-04_11:51:05 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:22:43 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.205763e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.282357e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.339950e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 -TOTAL : 0.363554 sec -INFO: No Floating Point Exceptions have been reported - 824,654,219 cycles:u # 2.250 GHz (74.45%) - 2,378,121 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.16%) - 5,258,443 stalled-cycles-backend:u # 0.64% backend cycles idle (76.09%) - 1,405,112,573 instructions:u # 1.70 insn per cycle - # 0.00 stalled cycles per insn (75.85%) - 0.423315562 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.879555e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.325400e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.788674e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.519459 sec +INFO: No Floating Point Exceptions have been reported + 2,192,488,330 cycles # 2.904 GHz + 3,108,589,457 instructions # 1.42 insn per cycle + 0.811901500 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 Relative difference = 2.590743366698123e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.170090e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.317717e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.317717e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 1.084559 sec -INFO: No Floating Point Exceptions have been reported - 3,490,848,997 cycles:u # 3.181 GHz (74.75%) - 7,660,767 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.48%) - 9,573,661 stalled-cycles-backend:u # 0.27% backend cycles idle (74.84%) - 9,510,925,502 instructions:u # 2.72 insn per cycle - # 0.00 stalled cycles per insn (75.22%) - 1.101615381 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 332) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.365007e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.070287e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.070287e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.231113 sec +INFO: No Floating Point Exceptions have been reported + 3,770,884,627 cycles # 3.051 GHz + 9,730,787,613 instructions # 2.58 insn per cycle + 1.236813254 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.200255e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.821135e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.821135e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.664149 sec -INFO: No Floating Point Exceptions have been reported - 2,027,787,140 cycles:u # 2.998 GHz (74.61%) - 8,263,760 stalled-cycles-frontend:u # 0.41% frontend cycles idle (75.11%) - 12,856,869 stalled-cycles-backend:u # 0.63% backend cycles idle (75.17%) - 5,831,439,407 instructions:u # 2.88 insn per cycle - # 0.00 stalled cycles per insn (75.17%) - 0.680595379 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1321) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.578999e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033336e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033336e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.776953 sec +INFO: No Floating Point Exceptions have been reported + 2,334,361,876 cycles # 2.984 GHz + 5,933,594,772 instructions # 2.54 insn per cycle + 0.782905833 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.423841e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.000143e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.000143e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.494024 sec -INFO: No Floating Point Exceptions have been reported - 1,417,663,203 cycles:u # 2.800 GHz (74.74%) - 8,507,099 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.86%) - 18,358,587 stalled-cycles-backend:u # 1.29% backend cycles idle (74.86%) - 3,268,344,350 instructions:u # 2.31 insn per cycle - # 0.01 stalled cycles per insn (74.73%) - 0.510616829 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.298604e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.378530e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.378530e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.568056 sec +INFO: No Floating Point Exceptions have been reported + 1,681,243,313 cycles # 2.932 GHz + 3,315,595,889 instructions # 1.97 insn per cycle + 0.574037989 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1499) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.355034e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.488075e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.488075e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.558433 sec +INFO: No Floating Point Exceptions have been reported + 1,640,005,974 cycles # 2.909 GHz + 3,285,268,931 instructions # 2.00 insn per cycle + 0.564410411 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1375) (512y: 96) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.255707e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.292044e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.292044e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.576788 sec +INFO: No Floating Point Exceptions have been reported + 1,373,892,799 cycles # 2.360 GHz + 2,425,202,745 instructions # 1.77 insn per cycle + 0.582721873 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 580) (512y: 60) (512z: 1021) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt index a70a2e7d3c..8744af06d4 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-10-04_11:51:11 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:22:55 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.339198e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.280910e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.338288e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 -TOTAL : 0.365386 sec -INFO: No Floating Point Exceptions have been reported - 841,771,538 cycles:u # 2.296 GHz (74.97%) - 2,469,523 stalled-cycles-frontend:u # 0.29% frontend cycles idle (73.99%) - 5,625,984 stalled-cycles-backend:u # 0.67% backend cycles idle (74.23%) - 1,399,458,696 instructions:u # 1.66 insn per cycle - # 0.00 stalled cycles per insn (74.61%) - 0.427605675 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.982500e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.466123e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.977983e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.518522 sec +INFO: No Floating Point Exceptions have been reported + 2,233,076,106 cycles # 2.958 GHz + 3,164,749,953 instructions # 1.42 insn per cycle + 0.811884376 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956172964262 Relative difference = 2.590743366698123e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.177270e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.322334e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.322334e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 1.078822 sec -INFO: No Floating Point Exceptions have been reported - 3,468,850,927 cycles:u # 3.179 GHz (74.79%) - 8,097,787 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.08%) - 13,115,680 stalled-cycles-backend:u # 0.38% backend cycles idle (75.08%) - 9,432,282,791 instructions:u # 2.72 insn per cycle - # 0.00 stalled cycles per insn (75.08%) - 1.095862672 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 342) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.301306e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.064535e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.064535e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.238968 sec +INFO: No Floating Point Exceptions have been reported + 3,730,421,090 cycles # 2.998 GHz + 9,611,838,153 instructions # 2.58 insn per cycle + 1.245009902 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.211326e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.827359e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.827359e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.661082 sec -INFO: No Floating Point Exceptions have been reported - 1,999,223,079 cycles:u # 2.967 GHz (74.92%) - 7,891,752 stalled-cycles-frontend:u # 0.39% frontend cycles idle (75.08%) - 16,453,867 stalled-cycles-backend:u # 0.82% backend cycles idle (75.08%) - 5,834,514,752 instructions:u # 2.92 insn per cycle - # 0.00 stalled cycles per insn (75.07%) - 0.678122223 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1295) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.519835e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.952712e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.952712e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.801104 sec +INFO: No Floating Point Exceptions have been reported + 2,353,664,883 cycles # 2.919 GHz + 5,879,099,517 instructions # 2.50 insn per cycle + 0.807062172 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1340) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.413437e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.985270e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.985270e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.493893 sec -INFO: No Floating Point Exceptions have been reported - 1,414,426,875 cycles:u # 2.795 GHz (75.14%) - 8,214,642 stalled-cycles-frontend:u # 0.58% frontend cycles idle (74.74%) - 16,376,840 stalled-cycles-backend:u # 1.16% backend cycles idle (74.72%) - 3,277,774,628 instructions:u # 2.32 insn per cycle - # 0.00 stalled cycles per insn (74.85%) - 0.510737818 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1418) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.306572e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.401136e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.401136e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.564979 sec +INFO: No Floating Point Exceptions have been reported + 1,668,493,167 cycles # 2.925 GHz + 3,288,096,894 instructions # 1.97 insn per cycle + 0.571004997 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1436) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956172964268 Relative difference = 2.59074336294025e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.353584e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.490021e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.490021e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.556005 sec +INFO: No Floating Point Exceptions have been reported + 1,637,480,739 cycles # 2.917 GHz + 3,262,503,753 instructions # 1.99 insn per cycle + 0.561947958 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1328) (512y: 96) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.278727e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.296527e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.296527e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.572881 sec +INFO: No Floating Point Exceptions have been reported + 1,396,071,165 cycles # 2.414 GHz + 2,410,100,240 instructions # 1.73 insn per cycle + 0.578909062 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 547) (512y: 60) (512z: 1007) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956172964268 +Relative difference = 2.59074336294025e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt index 3f2ab68f19..319b533795 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-10-04_11:51:17 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:23:07 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.509378e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.972058e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.041321e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.485983e-01 +- 3.276854e-05 ) GeV^0 -TOTAL : 0.325556 sec -INFO: No Floating Point Exceptions have been reported - 821,644,211 cycles:u # 2.440 GHz (74.04%) - 2,420,357 stalled-cycles-frontend:u # 0.29% frontend cycles idle (74.41%) - 12,629,346 stalled-cycles-backend:u # 1.54% backend cycles idle (74.81%) - 1,378,416,866 instructions:u # 1.68 insn per cycle - # 0.01 stalled cycles per insn (74.88%) - 0.379986913 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.021736e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.095898e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.502720e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 +TOTAL : 0.484746 sec +INFO: No Floating Point Exceptions have been reported + 2,097,572,068 cycles # 2.947 GHz + 2,993,117,399 instructions # 1.43 insn per cycle + 0.769929348 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 97 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477195e-01 -Avg ME (F77/GPU) = 0.14771957969060168 -Relative difference = 5.394724574150425e-07 +Avg ME (F77/GPU) = 0.14771956735057756 +Relative difference = 4.559355911674916e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.379058e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.578973e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.578973e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 -TOTAL : 0.908172 sec -INFO: No Floating Point Exceptions have been reported - 2,981,211,693 cycles:u # 3.254 GHz (74.72%) - 6,588,015 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.69%) - 4,890,728 stalled-cycles-backend:u # 0.16% backend cycles idle (74.76%) - 9,481,627,819 instructions:u # 3.18 insn per cycle - # 0.00 stalled cycles per insn (74.77%) - 0.920815479 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 432) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.485111e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.089179e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.089179e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 1.192909 sec +INFO: No Floating Point Exceptions have been reported + 3,665,476,463 cycles # 3.060 GHz + 9,601,549,579 instructions # 2.62 insn per cycle + 1.198508580 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 463) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.468028e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.171493e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.171493e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283183e-05 ) GeV^0 -TOTAL : 0.449474 sec -INFO: No Floating Point Exceptions have been reported - 1,387,037,153 cycles:u # 3.032 GHz (74.51%) - 6,234,875 stalled-cycles-frontend:u # 0.45% frontend cycles idle (75.37%) - 19,626,012 stalled-cycles-backend:u # 1.41% backend cycles idle (75.52%) - 3,856,671,799 instructions:u # 2.78 insn per cycle - # 0.01 stalled cycles per insn (75.52%) - 0.461712887 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1513) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.258115e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.376765e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.376765e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 0.551301 sec +INFO: No Floating Point Exceptions have been reported + 1,637,946,426 cycles # 2.944 GHz + 3,967,582,411 instructions # 2.42 insn per cycle + 0.556978816 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1579) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955448668450 -Relative difference = 3.081061382869002e-07 +Avg ME (F77/C++) = 0.14771955861942843 +Relative difference = 2.80129187869649e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.654464e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.000589e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.000589e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283177e-05 ) GeV^0 -TOTAL : 0.373131 sec -INFO: No Floating Point Exceptions have been reported - 1,093,114,711 cycles:u # 2.868 GHz (74.83%) - 5,539,006 stalled-cycles-frontend:u # 0.51% frontend cycles idle (74.83%) - 11,753,677 stalled-cycles-backend:u # 1.08% backend cycles idle (74.82%) - 2,419,335,654 instructions:u # 2.21 insn per cycle - # 0.00 stalled cycles per insn (74.98%) - 0.385911766 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1876) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.152306e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.639356e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.639356e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.420259 sec +INFO: No Floating Point Exceptions have been reported + 1,264,212,435 cycles # 2.972 GHz + 2,497,364,762 instructions # 1.98 insn per cycle + 0.425990331 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1924) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955128526315 -Relative difference = 3.2977842382139064e-07 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.176305e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.859286e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.859286e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.418213 sec +INFO: No Floating Point Exceptions have been reported + 1,244,133,116 cycles # 2.939 GHz + 2,473,380,671 instructions # 1.99 insn per cycle + 0.423994842 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1870) (512y: 1) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.060336e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.249952e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.249952e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 +TOTAL : 0.431162 sec +INFO: No Floating Point Exceptions have been reported + 1,082,620,148 cycles # 2.481 GHz + 2,073,283,815 instructions # 1.92 insn per cycle + 0.436955508 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1011) (512y: 5) (512z: 1292) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955262403935 +Relative difference = 3.207154680524219e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt index 9145b856d6..30254feeab 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-10-04_11:51:22 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:23:19 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 7.522939e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.946792e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.014454e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.485983e-01 +- 3.276854e-05 ) GeV^0 -TOTAL : 0.344754 sec -INFO: No Floating Point Exceptions have been reported - 799,483,065 cycles:u # 2.366 GHz (74.85%) - 2,309,346 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.34%) - 6,615,642 stalled-cycles-backend:u # 0.83% backend cycles idle (76.39%) - 1,387,436,934 instructions:u # 1.74 insn per cycle - # 0.00 stalled cycles per insn (73.91%) - 0.403460468 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 1.019401e+08 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.048318e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.455629e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486732e-01 +- 3.293572e-05 ) GeV^0 +TOTAL : 0.481010 sec +INFO: No Floating Point Exceptions have been reported + 2,088,372,875 cycles # 2.945 GHz + 2,964,890,992 instructions # 1.42 insn per cycle + 0.766303026 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 86 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477195e-01 -Avg ME (F77/GPU) = 0.14771957969060168 -Relative difference = 5.394724574150425e-07 +Avg ME (F77/GPU) = 0.14771956525510177 +Relative difference = 4.4175008557828484e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.383132e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.585694e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.585694e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283184e-05 ) GeV^0 -TOTAL : 0.906227 sec -INFO: No Floating Point Exceptions have been reported - 2,961,341,832 cycles:u # 3.239 GHz (74.89%) - 6,546,623 stalled-cycles-frontend:u # 0.22% frontend cycles idle (74.65%) - 8,080,120 stalled-cycles-backend:u # 0.27% backend cycles idle (74.70%) - 9,414,623,269 instructions:u # 3.18 insn per cycle - # 0.00 stalled cycles per insn (74.70%) - 0.918418496 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 337) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.478146e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.094736e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.094736e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 1.193560 sec +INFO: No Floating Point Exceptions have been reported + 3,623,971,187 cycles # 3.024 GHz + 9,471,432,296 instructions # 2.61 insn per cycle + 1.199132805 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 367) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956094773486 Relative difference = 2.643675256627469e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.501547e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.197208e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.197208e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283183e-05 ) GeV^0 -TOTAL : 0.450826 sec -INFO: No Floating Point Exceptions have been reported - 1,381,505,679 cycles:u # 3.010 GHz (74.30%) - 6,213,946 stalled-cycles-frontend:u # 0.45% frontend cycles idle (75.15%) - 9,694,256 stalled-cycles-backend:u # 0.70% backend cycles idle (75.60%) - 3,820,776,901 instructions:u # 2.77 insn per cycle - # 0.00 stalled cycles per insn (75.60%) - 0.463548588 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1479) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.293885e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.464836e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.464836e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293563e-05 ) GeV^0 +TOTAL : 0.543877 sec +INFO: No Floating Point Exceptions have been reported + 1,640,922,140 cycles # 2.988 GHz + 3,933,388,950 instructions # 2.40 insn per cycle + 0.549660540 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1517) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955448668450 -Relative difference = 3.081061382869002e-07 +Avg ME (F77/C++) = 0.14771955861942843 +Relative difference = 2.80129187869649e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.671406e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.045498e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.045498e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283177e-05 ) GeV^0 -TOTAL : 0.371300 sec -INFO: No Floating Point Exceptions have been reported - 1,092,610,661 cycles:u # 2.881 GHz (74.81%) - 5,305,410 stalled-cycles-frontend:u # 0.49% frontend cycles idle (74.70%) - 33,831,557 stalled-cycles-backend:u # 3.10% backend cycles idle (74.69%) - 2,378,087,655 instructions:u # 2.18 insn per cycle - # 0.01 stalled cycles per insn (74.73%) - 0.383374559 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1802) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.096652e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.526616e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.526616e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.424433 sec +INFO: No Floating Point Exceptions have been reported + 1,265,916,102 cycles # 2.948 GHz + 2,482,033,677 instructions # 1.96 insn per cycle + 0.430083916 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1817) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771955128526315 -Relative difference = 3.2977842382139064e-07 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.099768e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.536640e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.536640e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293562e-05 ) GeV^0 +TOTAL : 0.425471 sec +INFO: No Floating Point Exceptions have been reported + 1,239,687,962 cycles # 2.879 GHz + 2,457,003,272 instructions # 1.98 insn per cycle + 0.431204562 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1773) (512y: 1) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955698961392 +Relative difference = 2.9116235141448046e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.076752e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.347849e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.347849e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486735e-01 +- 3.293561e-05 ) GeV^0 +TOTAL : 0.427038 sec +INFO: No Floating Point Exceptions have been reported + 1,082,096,190 cycles # 2.503 GHz + 2,057,508,420 instructions # 1.90 insn per cycle + 0.432876705 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 906) (512y: 5) (512z: 1273) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771955262403935 +Relative difference = 3.207154680524219e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt index 620a232d6e..c992dd1560 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-10-04_11:51:28 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:23:30 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.287948e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.300323e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.359514e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 -TOTAL : 0.350690 sec -INFO: No Floating Point Exceptions have been reported - 790,684,072 cycles:u # 2.155 GHz (76.09%) - 2,255,006 stalled-cycles-frontend:u # 0.29% frontend cycles idle (76.21%) - 8,212,728 stalled-cycles-backend:u # 1.04% backend cycles idle (74.05%) - 1,515,061,170 instructions:u # 1.92 insn per cycle - # 0.01 stalled cycles per insn (73.59%) - 0.411230761 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.870947e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.292610e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.748112e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.517185 sec +INFO: No Floating Point Exceptions have been reported + 2,235,637,342 cycles # 2.968 GHz + 3,165,178,455 instructions # 1.42 insn per cycle + 0.810025271 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 130 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 Relative difference = 2.5810037581511336e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.073332e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.193079e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.193079e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 1.165645 sec -INFO: No Floating Point Exceptions have been reported - 3,760,508,128 cycles:u # 3.181 GHz (74.97%) - 9,799,535 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.04%) - 82,319,711 stalled-cycles-backend:u # 2.19% backend cycles idle (75.04%) - 9,617,990,540 instructions:u # 2.56 insn per cycle - # 0.01 stalled cycles per insn (74.98%) - 1.186960560 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 332) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.276369e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.059318e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.059318e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.243225 sec +INFO: No Floating Point Exceptions have been reported + 3,811,509,127 cycles # 3.053 GHz + 9,755,893,754 instructions # 2.56 insn per cycle + 1.249011242 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 341) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.257401e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.939163e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.939163e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.651275 sec -INFO: No Floating Point Exceptions have been reported - 1,960,210,789 cycles:u # 2.953 GHz (74.19%) - 7,743,342 stalled-cycles-frontend:u # 0.40% frontend cycles idle (74.19%) - 9,288,328 stalled-cycles-backend:u # 0.47% backend cycles idle (74.70%) - 5,855,328,189 instructions:u # 2.99 insn per cycle - # 0.00 stalled cycles per insn (75.19%) - 0.668603584 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1383) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.575213e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.033630e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.033630e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.777751 sec +INFO: No Floating Point Exceptions have been reported + 2,324,158,098 cycles # 2.968 GHz + 5,921,190,869 instructions # 2.55 insn per cycle + 0.783772418 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1412) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.497892e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.159332e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.159332e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.487677 sec -INFO: No Floating Point Exceptions have been reported - 1,407,046,199 cycles:u # 2.814 GHz (74.51%) - 8,503,247 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.55%) - 15,903,763 stalled-cycles-backend:u # 1.13% backend cycles idle (74.44%) - 3,151,478,847 instructions:u # 2.24 insn per cycle - # 0.01 stalled cycles per insn (75.20%) - 0.504440981 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1546) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.318378e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.429052e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.429052e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.565758 sec +INFO: No Floating Point Exceptions have been reported + 1,652,981,708 cycles # 2.895 GHz + 3,254,347,551 instructions # 1.97 insn per cycle + 0.571727030 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1567) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956675526976 -Relative difference = 2.2505293980258705e-07 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.435162e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.624330e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.624330e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.542890 sec +INFO: No Floating Point Exceptions have been reported + 1,608,327,569 cycles # 2.934 GHz + 3,210,329,014 instructions # 2.00 insn per cycle + 0.548955457 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1446) (512y: 101) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.277841e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.302624e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.302624e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.573796 sec +INFO: No Floating Point Exceptions have been reported + 1,366,629,222 cycles # 2.360 GHz + 2,377,238,088 instructions # 1.74 insn per cycle + 0.579856899 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 768) (512y: 64) (512z: 1063) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt index 95e26b8533..1ec6ca11ae 100644 --- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -DATE: 2024-10-04_11:51:33 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:23:42 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 5.319682e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.271681e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.328454e+08 ) sec^-1 -MeanMatrixElemValue = ( 1.486776e-01 +- 3.291446e-05 ) GeV^0 -TOTAL : 0.348858 sec -INFO: No Floating Point Exceptions have been reported - 836,700,042 cycles:u # 2.292 GHz (75.28%) - 2,521,349 stalled-cycles-frontend:u # 0.30% frontend cycles idle (75.13%) - 6,955,479 stalled-cycles-backend:u # 0.83% backend cycles idle (74.89%) - 1,467,285,506 instructions:u # 1.75 insn per cycle - # 0.00 stalled cycles per insn (73.54%) - 0.411037215 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 5.955347e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.449634e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.971675e+08 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.519560 sec +INFO: No Floating Point Exceptions have been reported + 2,229,656,114 cycles # 2.956 GHz + 3,136,915,829 instructions # 1.41 insn per cycle + 0.813453217 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 1.477196e-01 Avg ME (F77/GPU) = 0.14771956187351573 Relative difference = 2.5810037581511336e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 1.168828e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.312276e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.312276e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 1.084440 sec -INFO: No Floating Point Exceptions have been reported - 3,504,205,366 cycles:u # 3.194 GHz (74.49%) - 7,959,362 stalled-cycles-frontend:u # 0.23% frontend cycles idle (74.85%) - 13,470,086 stalled-cycles-backend:u # 0.38% backend cycles idle (75.22%) - 9,469,384,445 instructions:u # 2.70 insn per cycle - # 0.00 stalled cycles per insn (75.21%) - 1.101421195 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 343) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 9.306555e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.063008e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.063008e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 1.238276 sec +INFO: No Floating Point Exceptions have been reported + 3,773,723,631 cycles # 3.035 GHz + 9,644,120,028 instructions # 2.56 insn per cycle + 1.244186863 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 359) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 2.280836e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.941406e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.941406e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.646416 sec -INFO: No Floating Point Exceptions have been reported - 1,944,085,411 cycles:u # 2.952 GHz (74.64%) - 7,522,720 stalled-cycles-frontend:u # 0.39% frontend cycles idle (74.60%) - 16,790,089 stalled-cycles-backend:u # 0.86% backend cycles idle (74.60%) - 5,886,824,204 instructions:u # 3.03 insn per cycle - # 0.00 stalled cycles per insn (74.56%) - 0.663422839 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 1353) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.549168e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.991454e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.991454e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.789049 sec +INFO: No Floating Point Exceptions have been reported + 2,313,346,456 cycles # 2.912 GHz + 5,848,887,121 instructions # 2.53 insn per cycle + 0.794970078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 1371) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 Avg ME (F77/C++) = 0.14771956645541506 Relative difference = 2.270828308707201e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 3.510803e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 5.168157e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 5.168157e+06 ) sec^-1 -MeanMatrixElemValue = ( 1.486031e-01 +- 3.283178e-05 ) GeV^0 -TOTAL : 0.485381 sec -INFO: No Floating Point Exceptions have been reported - 1,393,931,403 cycles:u # 2.799 GHz (74.55%) - 8,424,009 stalled-cycles-frontend:u # 0.60% frontend cycles idle (74.44%) - 25,161,499 stalled-cycles-backend:u # 1.81% backend cycles idle (74.32%) - 3,171,112,938 instructions:u # 2.27 insn per cycle - # 0.01 stalled cycles per insn (74.68%) - 0.502324189 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1487) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.347614e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.473937e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.473937e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.557834 sec +INFO: No Floating Point Exceptions have been reported + 1,655,348,908 cycles # 2.940 GHz + 3,217,952,635 instructions # 1.94 insn per cycle + 0.563871078 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1483) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 1.477196e-01 -Avg ME (F77/C++) = 0.14771956675526976 -Relative difference = 2.2505293980258705e-07 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.424845e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.621915e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.621915e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.543698 sec +INFO: No Floating Point Exceptions have been reported + 1,602,341,227 cycles # 2.919 GHz + 3,182,199,907 instructions # 1.99 insn per cycle + 0.549609066 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1382) (512y: 101) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 2.297489e+06 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.339579e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.339579e+06 ) sec^-1 +MeanMatrixElemValue = ( 1.486736e-01 +- 3.293564e-05 ) GeV^0 +TOTAL : 0.569767 sec +INFO: No Floating Point Exceptions have been reported + 1,382,180,389 cycles # 2.403 GHz + 2,361,725,571 instructions # 1.71 insn per cycle + 0.575784231 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 716) (512y: 64) (512z: 1056) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 1.477196e-01 +Avg ME (F77/C++) = 0.14771956674392650 +Relative difference = 2.2512972893324335e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt index b8e944a251..370e514c12 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:49:56 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:20:23 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.825932e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.303839e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.322040e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 -TOTAL : 0.410068 sec -INFO: No Floating Point Exceptions have been reported - 997,958,849 cycles:u # 2.373 GHz (75.23%) - 2,279,294 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.29%) - 6,782,942 stalled-cycles-backend:u # 0.68% backend cycles idle (75.46%) - 1,625,055,048 instructions:u # 1.63 insn per cycle - # 0.00 stalled cycles per insn (74.44%) - 0.467403205 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.230162e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.323594e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.002154e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.536130 sec +INFO: No Floating Point Exceptions have been reported + 2,256,394,755 cycles # 2.938 GHz + 3,245,914,401 instructions # 1.44 insn per cycle + 0.828827482 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 -Avg ME (F77/GPU) = 2.0158358666195553 -Relative difference = 6.616631755314852e-08 +Avg ME (F77/GPU) = 2.0158358666195562 +Relative difference = 6.616631711254798e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.544927e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.606801e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.606801e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.307024 sec -INFO: No Floating Point Exceptions have been reported - 14,759,851,104 cycles:u # 3.418 GHz (75.00%) - 10,213,686 stalled-cycles-frontend:u # 0.07% frontend cycles idle (74.99%) - 2,963,621,521 stalled-cycles-backend:u # 20.08% backend cycles idle (74.99%) - 45,578,208,957 instructions:u # 3.09 insn per cycle - # 0.07 stalled cycles per insn (75.00%) - 4.323134979 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 663) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.895732e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.944199e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.944199e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.637832 sec +INFO: No Floating Point Exceptions have been reported + 17,273,065,240 cycles # 3.061 GHz + 45,923,472,217 instructions # 2.66 insn per cycle + 5.643410439 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158358666194411 -Relative difference = 6.616637417031725e-08 +Avg ME (F77/C++) = 2.0158358666194407 +Relative difference = 6.616637439061751e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.343987e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.531990e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.531990e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.593460 sec -INFO: No Floating Point Exceptions have been reported - 8,804,265,677 cycles:u # 3.380 GHz (74.95%) - 8,608,560 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.10%) - 2,711,216,699 stalled-cycles-backend:u # 30.79% backend cycles idle (75.13%) - 27,713,688,883 instructions:u # 3.15 insn per cycle - # 0.10 stalled cycles per insn (75.13%) - 2.609709313 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2458) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.297798e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.461035e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.461035e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.287380 sec +INFO: No Floating Point Exceptions have been reported + 10,057,055,600 cycles # 3.055 GHz + 27,804,384,494 instructions # 2.76 insn per cycle + 3.293195334 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2537) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.337973e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.867218e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.867218e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.608838 sec -INFO: No Floating Point Exceptions have been reported - 5,332,671,248 cycles:u # 3.290 GHz (74.86%) - 8,580,627 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.87%) - 548,978,982 stalled-cycles-backend:u # 10.29% backend cycles idle (74.86%) - 12,401,583,918 instructions:u # 2.33 insn per cycle - # 0.04 stalled cycles per insn (74.83%) - 1.625027653 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2492) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.984936e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.354618e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.354618e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.209636 sec +INFO: No Floating Point Exceptions have been reported + 6,102,986,954 cycles # 2.763 GHz + 12,589,726,132 instructions # 2.06 insn per cycle + 2.215628249 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2620) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.327606e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.775533e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.775533e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.077880 sec +INFO: No Floating Point Exceptions have been reported + 5,579,947,178 cycles # 2.679 GHz + 12,003,081,651 instructions # 2.15 insn per cycle + 2.084004672 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2365) (512y: 144) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.667640e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.860946e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.860946e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.965237 sec +INFO: No Floating Point Exceptions have been reported + 5,764,359,655 cycles # 1.943 GHz + 8,342,529,257 instructions # 1.45 insn per cycle + 2.971031508 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1468) (512y: 122) (512z: 1806) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt index 8097702dbb..4a0767e5de 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:50:09 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:20:47 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:DBL+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.860299e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.359622e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.378594e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 -TOTAL : 0.400310 sec -INFO: No Floating Point Exceptions have been reported - 977,387,830 cycles:u # 2.341 GHz (74.17%) - 2,517,211 stalled-cycles-frontend:u # 0.26% frontend cycles idle (75.55%) - 6,318,079 stalled-cycles-backend:u # 0.65% backend cycles idle (75.23%) - 1,636,223,246 instructions:u # 1.67 insn per cycle - # 0.00 stalled cycles per insn (75.17%) - 0.463223853 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.355605e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.277087e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.956218e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.530876 sec +INFO: No Floating Point Exceptions have been reported + 2,249,324,155 cycles # 2.931 GHz + 3,226,562,604 instructions # 1.43 insn per cycle + 0.824282948 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 -Avg ME (F77/GPU) = 2.0158358666195553 -Relative difference = 6.616631755314852e-08 +Avg ME (F77/GPU) = 2.0158358666195562 +Relative difference = 6.616631711254798e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.672609e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.739968e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.739968e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.102592 sec -INFO: No Floating Point Exceptions have been reported - 14,079,409,845 cycles:u # 3.422 GHz (74.98%) - 8,049,289 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) - 2,310,838,592 stalled-cycles-backend:u # 16.41% backend cycles idle (74.93%) - 44,472,298,847 instructions:u # 3.16 insn per cycle - # 0.05 stalled cycles per insn (74.92%) - 4.118929817 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 592) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.940475e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.991632e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.991632e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.508327 sec +INFO: No Floating Point Exceptions have been reported + 16,765,096,335 cycles # 3.041 GHz + 44,907,213,075 instructions # 2.68 insn per cycle + 5.514387413 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 566) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.610444e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.829618e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.829618e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.453155 sec -INFO: No Floating Point Exceptions have been reported - 8,297,206,118 cycles:u # 3.367 GHz (75.01%) - 9,142,863 stalled-cycles-frontend:u # 0.11% frontend cycles idle (75.01%) - 1,462,081,092 stalled-cycles-backend:u # 17.62% backend cycles idle (75.01%) - 26,753,959,008 instructions:u # 3.22 insn per cycle - # 0.05 stalled cycles per insn (75.03%) - 2.468792614 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2278) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.469638e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.652475e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.652475e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.131046 sec +INFO: No Floating Point Exceptions have been reported + 9,519,736,258 cycles # 3.036 GHz + 26,678,539,115 instructions # 2.80 insn per cycle + 3.137009684 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2326) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194411 Relative difference = 6.616637417031725e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = DOUBLE (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.582782e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 7.005097e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 7.005097e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.770963 sec -INFO: No Floating Point Exceptions have been reported - 5,924,884,751 cycles:u # 3.324 GHz (74.79%) - 10,170,880 stalled-cycles-frontend:u # 0.17% frontend cycles idle (74.89%) - 1,235,930,126 stalled-cycles-backend:u # 20.86% backend cycles idle (74.90%) - 14,218,104,856 instructions:u # 2.40 insn per cycle - # 0.09 stalled cycles per insn (74.88%) - 1.786720730 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2700) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.671787e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.002601e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.002601e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.352280 sec +INFO: No Floating Point Exceptions have been reported + 6,629,963,277 cycles # 2.812 GHz + 14,109,636,377 instructions # 2.13 insn per cycle + 2.358209355 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2705) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158358666194953 Relative difference = 6.616634729368461e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.754606e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.104698e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.104698e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.312900 sec +INFO: No Floating Point Exceptions have been reported + 6,361,189,972 cycles # 2.744 GHz + 13,713,824,218 instructions # 2.16 insn per cycle + 2.319011188 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2356) (512y: 298) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = DOUBLE (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.432030e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.604686e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.604686e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.161395 sec +INFO: No Floating Point Exceptions have been reported + 5,974,388,712 cycles # 1.887 GHz + 10,105,486,265 instructions # 1.69 insn per cycle + 3.167180711 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1318) (512y: 208) (512z: 1986) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158358666194953 +Relative difference = 6.616634729368461e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt index de9a53846a..171c4f07f1 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:50:21 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:21:12 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.029624e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.168974e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.192805e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.064391e+00 +- 3.343192e-03 ) GeV^0 -TOTAL : 0.334128 sec -INFO: No Floating Point Exceptions have been reported - 847,605,952 cycles:u # 2.446 GHz (74.31%) - 2,404,984 stalled-cycles-frontend:u # 0.28% frontend cycles idle (75.19%) - 6,809,593 stalled-cycles-backend:u # 0.80% backend cycles idle (76.16%) - 1,440,149,727 instructions:u # 1.70 insn per cycle - # 0.00 stalled cycles per insn (74.02%) - 0.392018881 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.343508e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.749333e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.880185e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 +TOTAL : 0.485800 sec +INFO: No Floating Point Exceptions have been reported + 2,094,905,997 cycles # 2.937 GHz + 3,016,360,566 instructions # 1.44 insn per cycle + 0.770368991 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 125 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.015844e+00 -Avg ME (F77/GPU) = 2.0158466693246737 -Relative difference = 1.3241722443517625e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015841e+00 +Avg ME (F77/GPU) = 2.0158787037944421 +Relative difference = 1.870375413642407e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.916433e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.996682e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.996682e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 -TOTAL : 3.739424 sec -INFO: No Floating Point Exceptions have been reported - 12,919,555,123 cycles:u # 3.448 GHz (74.96%) - 7,161,924 stalled-cycles-frontend:u # 0.06% frontend cycles idle (75.02%) - 2,653,034,768 stalled-cycles-backend:u # 20.54% backend cycles idle (75.02%) - 45,463,370,048 instructions:u # 3.52 insn per cycle - # 0.06 stalled cycles per insn (75.02%) - 3.751360428 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 667) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 2.003751e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.061477e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.061477e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 +TOTAL : 5.317047 sec +INFO: No Floating Point Exceptions have been reported + 16,226,729,405 cycles # 3.049 GHz + 45,319,748,869 instructions # 2.79 insn per cycle + 5.322657984 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 600) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158491450129077 -Relative difference = 7.193639399772436e-08 +Avg ME (F77/C++) = 2.0158491701586172 +Relative difference = 8.441039850630506e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.114956e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.484341e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.484341e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 -TOTAL : 1.859533 sec -INFO: No Floating Point Exceptions have been reported - 6,325,374,109 cycles:u # 3.388 GHz (75.03%) - 6,631,466 stalled-cycles-frontend:u # 0.10% frontend cycles idle (75.15%) - 2,746,926,513 stalled-cycles-backend:u # 43.43% backend cycles idle (75.15%) - 17,097,211,499 instructions:u # 2.70 insn per cycle - # 0.16 stalled cycles per insn (75.15%) - 1.871211674 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2902) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.661368e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.006222e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.006222e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 +TOTAL : 2.333881 sec +INFO: No Floating Point Exceptions have been reported + 7,065,193,815 cycles # 3.021 GHz + 17,792,282,713 instructions # 2.52 insn per cycle + 2.339489027 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 3147) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158492142800242 -Relative difference = 1.0629765641719438e-07 +Avg ME (F77/C++) = 2.0158486895961687 +Relative difference = 1.539816876576819e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 1.194705e+06 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.337360e+06 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.337360e+06 ) sec^-1 -MeanMatrixElemValue = ( 2.065802e+00 +- 3.352030e-03 ) GeV^0 -TOTAL : 1.021215 sec -INFO: No Floating Point Exceptions have been reported - 3,362,486,824 cycles:u # 3.269 GHz (75.11%) - 6,869,200 stalled-cycles-frontend:u # 0.20% frontend cycles idle (75.11%) - 830,943,627 stalled-cycles-backend:u # 24.71% backend cycles idle (75.11%) - 8,093,573,532 instructions:u # 2.41 insn per cycle - # 0.10 stalled cycles per insn (75.11%) - 1.033064527 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3258) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 8.680930e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.902131e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.902131e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.293085 sec +INFO: No Floating Point Exceptions have been reported + 3,745,244,491 cycles # 2.886 GHz + 8,263,077,102 instructions # 2.21 insn per cycle + 1.298740126 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3371) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015848e+00 -Avg ME (F77/C++) = 2.0158479403471574 -Relative difference = 2.9591934841076347e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 9.127600e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.045053e+06 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.045053e+06 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.236021 sec +INFO: No Floating Point Exceptions have been reported + 3,554,738,616 cycles # 2.865 GHz + 7,914,272,775 instructions # 2.23 insn per cycle + 1.241584729 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3214) (512y: 20) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 6.816839e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 7.519320e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 7.519320e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.622579 sec +INFO: No Floating Point Exceptions have been reported + 3,259,303,388 cycles # 2.003 GHz + 6,101,587,749 instructions # 1.87 insn per cycle + 1.628190659 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2258) (512y: 22) (512z: 2156) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015848e+00 +Avg ME (F77/C++) = 2.0158476348733529 +Relative difference = 1.8112806478434436e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt index cbd2b02691..5827327dd2 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:50:31 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:21:32 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:FLT+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 6.141035e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 1.214548e+08 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 1.240427e+08 ) sec^-1 -MeanMatrixElemValue = ( 2.064391e+00 +- 3.343192e-03 ) GeV^0 -TOTAL : 0.338852 sec -INFO: No Floating Point Exceptions have been reported - 846,172,164 cycles:u # 2.420 GHz (75.55%) - 2,384,656 stalled-cycles-frontend:u # 0.28% frontend cycles idle (74.94%) - 8,880,088 stalled-cycles-backend:u # 1.05% backend cycles idle (75.39%) - 1,440,550,238 instructions:u # 1.70 insn per cycle - # 0.01 stalled cycles per insn (77.34%) - 0.396188916 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 8.278999e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.762585e+08 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.886988e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072877e+00 +- 3.361153e-03 ) GeV^0 +TOTAL : 0.489792 sec +INFO: No Floating Point Exceptions have been reported + 2,055,512,500 cycles # 2.867 GHz + 2,939,151,591 instructions # 1.43 insn per cycle + 0.774255420 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 124 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/fcheck_hip.exe 2 64 2 -Avg ME (C++/GPU) = 2.015844e+00 -Avg ME (F77/GPU) = 2.0158466693246737 -Relative difference = 1.3241722443517625e-06 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2 +Avg ME (C++/GPU) = 2.015841e+00 +Avg ME (F77/GPU) = 2.0158787037944421 +Relative difference = 1.870375413642407e-05 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 3.042121e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 3.129542e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 3.129542e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 -TOTAL : 3.591543 sec -INFO: No Floating Point Exceptions have been reported - 12,399,003,358 cycles:u # 3.445 GHz (74.89%) - 7,265,600 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.90%) - 1,294,290,338 stalled-cycles-backend:u # 10.44% backend cycles idle (74.94%) - 44,252,750,290 instructions:u # 3.57 insn per cycle - # 0.03 stalled cycles per insn (75.03%) - 3.603349091 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 571) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.955650e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 2.011909e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 2.011909e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361545e-03 ) GeV^0 +TOTAL : 5.448356 sec +INFO: No Floating Point Exceptions have been reported + 15,943,191,357 cycles # 2.924 GHz + 44,424,518,586 instructions # 2.79 insn per cycle + 5.454103934 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 533) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158491450129077 -Relative difference = 7.193639399772436e-08 +Avg ME (F77/C++) = 2.0158491701586172 +Relative difference = 8.441039850630506e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.535434e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.112084e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.112084e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065823e+00 +- 3.352517e-03 ) GeV^0 -TOTAL : 1.535162 sec -INFO: No Floating Point Exceptions have been reported - 5,185,887,199 cycles:u # 3.362 GHz (74.95%) - 6,649,344 stalled-cycles-frontend:u # 0.13% frontend cycles idle (75.11%) - 1,503,459,114 stalled-cycles-backend:u # 28.99% backend cycles idle (75.11%) - 16,935,312,911 instructions:u # 3.27 insn per cycle - # 0.09 stalled cycles per insn (75.11%) - 1.547341857 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2752) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.276402e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.747216e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.747216e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072937e+00 +- 3.361544e-03 ) GeV^0 +TOTAL : 2.074473 sec +INFO: No Floating Point Exceptions have been reported + 6,074,931,142 cycles # 2.922 GHz + 17,078,265,912 instructions # 2.81 insn per cycle + 2.080193584 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2862) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015849e+00 -Avg ME (F77/C++) = 2.0158492142800242 -Relative difference = 1.0629765641719438e-07 +Avg ME (F77/C++) = 2.0158486895961687 +Relative difference = 1.539816876576819e-07 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = FLOAT (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 8.902619e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 9.674327e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 9.674327e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065802e+00 +- 3.352030e-03 ) GeV^0 -TOTAL : 1.321463 sec -INFO: No Floating Point Exceptions have been reported - 4,431,538,272 cycles:u # 3.335 GHz (74.77%) - 7,094,416 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.77%) - 1,688,909,827 stalled-cycles-backend:u # 38.11% backend cycles idle (74.77%) - 10,255,351,383 instructions:u # 2.31 insn per cycle - # 0.16 stalled cycles per insn (74.94%) - 1.333077945 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3884) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 6.007855e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.581033e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.581033e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.830417 sec +INFO: No Floating Point Exceptions have been reported + 5,038,064,439 cycles # 2.745 GHz + 10,225,598,218 instructions # 2.03 insn per cycle + 1.836161273 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3906) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 -Avg ME (C++/C++) = 2.015848e+00 -Avg ME (F77/C++) = 2.0158479403471574 -Relative difference = 2.9591934841076347e-08 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.986593e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.564461e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.564461e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 1.838696 sec +INFO: No Floating Point Exceptions have been reported + 4,986,170,011 cycles # 2.706 GHz + 9,996,697,446 instructions # 2.00 insn per cycle + 1.844536408 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 3805) (512y: 2) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015847e+00 +Avg ME (F77/C++) = 2.0158474864438176 +Relative difference = 2.4130988992271984e-07 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = FLOAT (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.589226e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.912431e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.912431e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072967e+00 +- 3.361967e-03 ) GeV^0 +TOTAL : 2.372568 sec +INFO: No Floating Point Exceptions have been reported + 4,377,668,270 cycles # 1.841 GHz + 8,445,524,154 instructions # 1.93 insn per cycle + 2.378514848 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2744) (512y: 4) (512z: 2754) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015848e+00 +Avg ME (F77/C++) = 2.0158476348733529 +Relative difference = 1.8112806478434436e-07 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt index ad357326a9..4c61e46c6d 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:50:41 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:21:54 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.844695e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.314566e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.332811e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 -TOTAL : 0.399769 sec -INFO: No Floating Point Exceptions have been reported - 1,000,344,858 cycles:u # 2.398 GHz (75.57%) - 2,483,436 stalled-cycles-frontend:u # 0.25% frontend cycles idle (74.29%) - 5,811,310 stalled-cycles-backend:u # 0.58% backend cycles idle (74.17%) - 1,551,135,214 instructions:u # 1.55 insn per cycle - # 0.00 stalled cycles per insn (76.46%) - 0.463433890 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.251838e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.183380e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 9.939643e+07 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.535615 sec +INFO: No Floating Point Exceptions have been reported + 2,198,949,202 cycles # 2.843 GHz + 3,150,067,963 instructions # 1.43 insn per cycle + 0.831211671 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 214 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 Relative difference = 6.751024171044779e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.571404e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.633916e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.633916e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.258083 sec -INFO: No Floating Point Exceptions have been reported - 14,636,986,021 cycles:u # 3.428 GHz (74.93%) - 9,201,381 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.90%) - 2,834,488,725 stalled-cycles-backend:u # 19.37% backend cycles idle (74.93%) - 45,648,548,771 instructions:u # 3.12 insn per cycle - # 0.06 stalled cycles per insn (75.03%) - 4.274649438 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 673) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.793078e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.838862e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.838862e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.957870 sec +INFO: No Floating Point Exceptions have been reported + 17,383,086,317 cycles # 2.915 GHz + 46,074,988,832 instructions # 2.65 insn per cycle + 5.963882040 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 622) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.331049e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.526012e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.526012e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.601226 sec -INFO: No Floating Point Exceptions have been reported - 8,833,488,747 cycles:u # 3.381 GHz (74.92%) - 9,265,639 stalled-cycles-frontend:u # 0.10% frontend cycles idle (74.92%) - 2,771,133,878 stalled-cycles-backend:u # 31.37% backend cycles idle (74.90%) - 27,586,457,635 instructions:u # 3.12 insn per cycle - # 0.10 stalled cycles per insn (75.03%) - 2.617092705 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2518) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.194287e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.355552e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.355552e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.392799 sec +INFO: No Floating Point Exceptions have been reported + 9,911,878,237 cycles # 2.918 GHz + 27,589,860,886 instructions # 2.78 insn per cycle + 3.398866655 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2581) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=0] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 7.500359e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 8.058510e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 8.058510e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.577290 sec -INFO: No Floating Point Exceptions have been reported - 5,237,176,956 cycles:u # 3.297 GHz (74.84%) - 9,167,020 stalled-cycles-frontend:u # 0.18% frontend cycles idle (74.86%) - 1,282,972,972 stalled-cycles-backend:u # 24.50% backend cycles idle (74.86%) - 12,276,243,394 instructions:u # 2.34 insn per cycle - # 0.10 stalled cycles per insn (74.83%) - 1.592961731 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2671) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 5.099557e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.502113e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.502113e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.164835 sec +INFO: No Floating Point Exceptions have been reported + 6,014,043,358 cycles # 2.771 GHz + 12,488,668,893 instructions # 2.08 insn per cycle + 2.170853663 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2776) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359151896224 -Relative difference = 4.20720623263505e-08 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 5.772169e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 6.266403e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 6.266403e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 1.920250 sec +INFO: No Floating Point Exceptions have been reported + 5,548,106,991 cycles # 2.882 GHz + 11,923,814,669 instructions # 2.15 insn per cycle + 1.926159830 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2521) (512y: 146) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.789351e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.994932e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.994932e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.873374 sec +INFO: No Floating Point Exceptions have been reported + 5,656,356,995 cycles # 1.965 GHz + 8,113,165,976 instructions # 1.43 insn per cycle + 2.879222217 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1671) (512y: 126) (512z: 1865) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt index 2c5c1083f9..9c262ab65b 100644 --- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt +++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt @@ -1,49 +1,68 @@ -Building in /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx -BACKEND=cppavx2 (was cppauto) +Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx +BACKEND=cpp512y (was cppauto) OMPFLAGS= FPTYPE='d' HELINL='0' HRDCOD='0' -HASCURAND=hasNoCurand +HASCURAND=hasCurand HASHIPRAND=hasNoHiprand -Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=avx2_d_inl0_hrd0_hasNoCurand_hasNoHiprand (USEBUILDDIR == 1) +Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_hasNoHiprand (USEBUILDDIR == 1) make: Nothing to be done for 'gtestlibs'. -make: Nothing to be done for 'all'. -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cuda +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppnone +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppsse4 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cppavx2 +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -make: Nothing to be done for 'all'. +make USEBUILDDIR=1 BACKEND=cpp512y +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -DATE: 2024-10-04_11:50:53 +make USEBUILDDIR=1 BACKEND=cpp512z +make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' +make[1]: Nothing to be done for 'all'. +make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx' -On uan03 [CPU: AMD EPYC 7A53 64-Core Processor] [GPU: AMD INSTINCT MI200]: +DATE: 2024-10-02_23:22:19 + +On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]: ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_HIP [hipcc 6.0.32831 (clang 17.0.0)] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = HIP:MIX+CXS:HIRDEV+RMBDEV+MESDEV/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) -EvtsPerSec[Rmb+ME] (23) = ( 1.854334e+07 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.340714e+07 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.359394e+07 ) sec^-1 -MeanMatrixElemValue = ( 2.073340e+00 +- 3.357983e-03 ) GeV^0 -TOTAL : 0.395885 sec -INFO: No Floating Point Exceptions have been reported - 1,009,668,218 cycles:u # 2.443 GHz (74.93%) - 2,342,458 stalled-cycles-frontend:u # 0.23% frontend cycles idle (75.08%) - 7,222,790 stalled-cycles-backend:u # 0.72% backend cycles idle (74.93%) - 1,606,228,617 instructions:u # 1.59 insn per cycle - # 0.00 stalled cycles per insn (73.70%) - 0.455473650 seconds time elapsed +EvtsPerSec[Rmb+ME] (23) = ( 4.276232e+07 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 9.390219e+07 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.005905e+08 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 0.527346 sec +INFO: No Floating Point Exceptions have been reported + 2,272,920,837 cycles # 2.964 GHz + 3,201,602,686 instructions # 1.41 insn per cycle + 0.824609816 seconds time elapsed +runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 +==PROF== Profiling "sigmaKin": launch__registers_per_thread 212 +==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100% ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/runTest_hip.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -51,34 +70,33 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/fcheck_hip.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2 Avg ME (C++/GPU) = 2.015836e+00 Avg ME (F77/GPU) = 2.0158358639104246 Relative difference = 6.751024171044779e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe +========================================================================= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/none+NAVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = SCALAR ('none': ~vector[1], no SIMD) -EvtsPerSec[Rmb+ME] (23) = ( 2.627354e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 2.693079e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 2.693079e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 4.170234 sec -INFO: No Floating Point Exceptions have been reported - 14,290,330,106 cycles:u # 3.417 GHz (74.98%) - 8,190,052 stalled-cycles-frontend:u # 0.06% frontend cycles idle (74.94%) - 709,468,716 stalled-cycles-backend:u # 4.96% backend cycles idle (74.96%) - 44,665,806,699 instructions:u # 3.13 insn per cycle - # 0.02 stalled cycles per insn (74.96%) - 4.186752470 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 591) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 1.916084e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 1.966623e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 1.966623e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 5.577101 sec +INFO: No Floating Point Exceptions have been reported + 16,950,562,354 cycles # 3.037 GHz + 45,091,377,881 instructions # 2.66 insn per cycle + 5.582979015 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 567) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -86,34 +104,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 4.396197e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 4.594671e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 4.594671e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 2.564153 sec -INFO: No Floating Point Exceptions have been reported - 8,706,948,556 cycles:u # 3.381 GHz (74.87%) - 11,021,237 stalled-cycles-frontend:u # 0.13% frontend cycles idle (74.84%) - 1,233,272,424 stalled-cycles-backend:u # 14.16% backend cycles idle (74.97%) - 26,375,882,323 instructions:u # 3.03 insn per cycle - # 0.05 stalled cycles per insn (75.12%) - 2.580435264 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 2311) (avx2: 0) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 3.424687e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 3.599685e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 3.599685e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 3.168080 sec +INFO: No Floating Point Exceptions have been reported + 9,533,110,078 cycles # 3.005 GHz + 26,250,804,820 instructions # 2.75 insn per cycle + 3.173990668 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2: 0) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -121,34 +136,31 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 Avg ME (F77/C++) = 2.0158359218686011 Relative difference = 3.8758807327712803e-08 OK (relative difference <= 5E-3) ========================================================================= -runExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW -Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 13.2.1] [inlineHel=0] [hardcodePARAM=1] -Workflow summary = CPP:MIX+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK FP precision = MIXED (NaN/abnormal=0, zero=0) Internal loops fptype_sv = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES] -EvtsPerSec[Rmb+ME] (23) = ( 6.489478e+05 ) sec^-1 -EvtsPerSec[MatrixElems] (3) = ( 6.904989e+05 ) sec^-1 -EvtsPerSec[MECalcOnly] (3a) = ( 6.904989e+05 ) sec^-1 -MeanMatrixElemValue = ( 2.065656e+00 +- 3.350853e-03 ) GeV^0 -TOTAL : 1.794954 sec -INFO: No Floating Point Exceptions have been reported - 6,019,924,912 cycles:u # 3.333 GHz (74.79%) - 9,633,706 stalled-cycles-frontend:u # 0.16% frontend cycles idle (74.82%) - 1,768,172,037 stalled-cycles-backend:u # 29.37% backend cycles idle (75.04%) - 13,981,192,969 instructions:u # 2.32 insn per cycle - # 0.13 stalled cycles per insn (75.20%) - 1.810890060 seconds time elapsed -=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2870) (512y: 0) (512z: 0) +EvtsPerSec[Rmb+ME] (23) = ( 4.704288e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.029318e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.029318e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.333614 sec +INFO: No Floating Point Exceptions have been reported + 6,735,900,933 cycles # 2.880 GHz + 14,030,236,491 instructions # 2.08 insn per cycle + 2.339440984 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2895) (512y: 0) (512z: 0) ------------------------------------------------------------------------- -runTest /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW [ PASSED ] 4 tests. DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } @@ -156,16 +168,76 @@ INFO: No Floating Point Exceptions have been reported DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } INFO: No Floating Point Exceptions have been reported ------------------------------------------------------------------------- -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 -cmpExe /users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 Avg ME (C++/C++) = 2.015836e+00 -Avg ME (F77/C++) = 2.0158359151896224 -Relative difference = 4.20720623263505e-08 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 4.936210e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 5.298362e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 5.298362e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.229547 sec +INFO: No Floating Point Exceptions have been reported + 6,391,727,814 cycles # 2.861 GHz + 13,514,455,678 instructions # 2.11 insn per cycle + 2.235403459 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 2531) (512y: 302) (512z: 0) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) ========================================================================= -/users/valassia/GPU2024/madgraph4gpu/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe is not supported (no avx512vl in /proc/cpuinfo) +runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP= +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +Process = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1] +Workflow summary = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK +FP precision = MIXED (NaN/abnormal=0, zero=0) +Internal loops fptype_sv = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES] +EvtsPerSec[Rmb+ME] (23) = ( 3.837043e+05 ) sec^-1 +EvtsPerSec[MatrixElems] (3) = ( 4.047080e+05 ) sec^-1 +EvtsPerSec[MECalcOnly] (3a) = ( 4.047080e+05 ) sec^-1 +MeanMatrixElemValue = ( 2.072848e+00 +- 3.360985e-03 ) GeV^0 +TOTAL : 2.838742 sec +INFO: No Floating Point Exceptions have been reported + 5,600,700,385 cycles # 1.969 GHz + 9,206,380,773 instructions # 1.64 insn per cycle + 2.844839134 seconds time elapsed +=Symbols in CPPProcess_cpp.o= (~sse4: 0) (avx2: 1456) (512y: 212) (512z: 2059) +------------------------------------------------------------------------- +runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe +INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW +[ PASSED ] 4 tests. +DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 } +INFO: No Floating Point Exceptions have been reported +DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 } +INFO: No Floating Point Exceptions have been reported +------------------------------------------------------------------------- +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2 +cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2 +Avg ME (C++/C++) = 2.015836e+00 +Avg ME (F77/C++) = 2.0158359178371690 +Relative difference = 4.0758688308634e-08 +OK (relative difference <= 5E-3) ========================================================================= TEST COMPLETED